How to use the librosa.core.load function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github 4p0pt0Z / Audio_blind_source_separation / generate_audioset_segments.py View on Github external
mel_spectrograms = collections.deque()
    stft_magnitudes = collections.deque()
    stft_phases = collections.deque()
    segment_names = collections.deque()

    # Mel filterbank matrix for computing the mel spectrograms
    mel_filterbank = librosa.filters.mel(config["sampling_rate"],
                                         n_fft=STFT_frame_n_samples,
                                         n_mels=config["n_Mel_filters"],
                                         fmin=config["Mel_min_freq"],
                                         fmax=config["Mel_max_freq"])

    # Loop over all the 10 second long audio file.
    for idx, (audio_file, label_file) in enumerate(zip(all_wavs_filenames, all_labels_filenames)):
        try:  # read file audio data and parse the label file. If this fail, continue to next file
            audio, _ = librosa.core.load(audio_file, sr=config["sampling_rate"], mono=True)
            labels_segment = parse_label_file(label_file, classes)
            if len(labels_segment) != len(classes):
                raise ValueError(
                    'Length of labels_segment is ' + str(len(labels_segment)) + ' while there are only ' + str(
                        len(classes)) + ' classes.')
        except Exception as e:
            print(e)
            print(audio_file)
            continue

        # Split the audio into segments
        n_seg_in_audio = audio.shape[0] // segment_n_samples
        audio = audio[:n_seg_in_audio * segment_n_samples]
        segments = np.split(audio, n_seg_in_audio)

        # For all segments, add white noise if needed, compute audio features and store them in queues,
github ynop / audiomate / tests / tracks / test_file.py View on Github external
def test_read_samples_range(self, name, audio_path):
        audio_path = os.path.join(audio_path, name)
        file_obj = tracks.FileTrack('some_idx', audio_path)

        expected, __ = librosa.core.load(audio_path, sr=None, mono=True,
                                         offset=1.0, duration=1.7)
        actual = file_obj.read_samples(offset=1.0, duration=1.7)

        assert np.array_equal(actual, expected)
github Hiroshiba / become-yukarin / become_yukarin / dataset / dataset.py View on Github external
def __call__(self, data: str, test=None):
        wave = librosa.core.load(data, sr=self._sample_rate, dtype=self._dtype)[0]
        if self._top_db is not None:
            wave = librosa.effects.remix(wave, intervals=librosa.effects.split(wave, top_db=self._top_db))
        if self._pad_second > 0.0:
            p = int(self._sample_rate * self._pad_second)
            wave = numpy.pad(wave, pad_width=(p, p), mode='constant')
        return Wave(wave, self._sample_rate)
github castorini / honkling / preprocessing.py View on Github external
def preprocess(config, example, timeshift=True, silence=False):
    if silence:
        example = "__silence__"

    in_len = config["input_length"]
    if silence:
        data = np.zeros(in_len, dtype=np.float32)
    else:
        data = librosa.core.load(example, sample_rate)[0]

    print_data('loaded data', data)

    data = np.pad(data, (0, max(0, in_len - len(data))), "constant")
    print_data('padded data', data)

    if timeshift:
        data = timeshift_audio(config, data)

    print_data('shifted data', data)

    data = preprocess_audio(data, config)

    print_data('preprocessed data', data)

    # data = torch.from_numpy(data);
github mozilla / TTS / datasets / TWEB.py View on Github external
def load_wav(self, filename):
        try:
            audio = librosa.core.load(filename, sr=self.sample_rate)
            return audio
        except RuntimeError as e:
            print(" !! Cannot read file : {}".format(filename))
github MTG / WGANSing / prep_data_nus.py View on Github external
singers = next(os.walk(config.wav_dir_nus))[1]

    

    for singer in singers:
        sing_dir = config.wav_dir_nus+singer+'/sing/'
        read_dir = config.wav_dir_nus+singer+'/read/'
        sing_wav_files=[x for x in os.listdir(sing_dir) if x.endswith('.wav') and not x.startswith('.')]

        count = 0

        print ("Processing singer %s" % singer)
        for lf in sing_wav_files:

            audio, fs = librosa.core.load(os.path.join(sing_dir,lf), sr=config.fs)

            audio = np.float64(audio)

            if len(audio.shape) == 2:

                vocals = np.array((audio[:,1]+audio[:,0])/2)

            else: 
                vocals = np.array(audio)

            voc_stft = abs(utils.stft(vocals))

            out_feats = utils.stft_to_feats(vocals,fs)

            strings_p = process_lab_file(os.path.join(sing_dir,lf[:-4]+'.txt'), len(voc_stft))
github yweweler / single-speaker-tts / audio / io.py View on Github external
used. Defaults to None.

        offset (:obj:`float`, optional):
            Offset to start loading the file at (in seconds).
            Defaults to 0.0.

        duration (:obj:`float`, optional):
            Only load up to this much audio (in seconds). When None is used,
            the file is loaded from `offset` to the end.
            Defaults to None.

    Returns:
        (np.ndarray, int):
            A tuple consisting of the audio time series and the sampling rate used for loading.
    """
    return librosa.core.load(wav_path, sr=sampling_rate, offset=offset, duration=duration)
github emilio-molina / audio_degrader / audio_degrader / DegradationMix.py View on Github external
Args:
            audio_file (AudioFile): Input AudioFile
        Returns:
            (np.array): Samples of noise with shape (2, nsamples)
        """
        sample_rate = audio_file.sample_rate
        extra_tmp_path = audio_file.tmp_path + '.extra.wav'
        cmd = "ffmpeg -y -i {0} -ar {1} -ac 2 -acodec pcm_f32le {2}".format(
                noise_path,
                sample_rate,
                extra_tmp_path)
        out, err, returncode = run(cmd)
        logging.debug(out)
        logging.debug(err)
        aux_x_noise, sr = lr.core.load(extra_tmp_path, sr=None, mono=False)
        assert sr == sample_rate
        os.remove(extra_tmp_path)
        if len(aux_x_noise.shape) == 1:
            noise_samples = np.zeros((2, len(aux_x_noise)))
            noise_samples[0, :] = aux_x_noise
            noise_samples[1, :] = aux_x_noise
        else:
            noise_samples = aux_x_noise
        return noise_samples
github mostafaelaraby / Tensorflow-Keyword-Spotting / utils.py View on Github external
def load_audio_file(file_path,sample_rate):
    input_length = sample_rate
    data = librosa.core.load(file_path, sr=sample_rate)[0]  # , sr=16000
    if len(data) > input_length:
        data = data[:input_length]
    else:
        data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
    return data
github hmartelb / Pix2Pix-Timbre-Transfer / code / data.py View on Github external
def load_audio(filename, sr=44100):
    return librosa.core.load(filename, sr=sr)[0]