How to use the librosa.stft function in librosa

To help you get started, we’ve selected a few librosa examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoleaf / aurora-sdk-linux / music_processor.py View on Github external
energy_output = energy.astype(np.uint16)
    else:
        energy_output = np.zeros(2).astype(np.uint16)

    # fft or mel
    if is_fft or is_mel:
        global sample_rate

        # down-sample by 4, with filtering, energy not scaled
        data_np = librosa.resample(data_np,
                                   sample_rate,
                                   sample_rate/4,
                                   res_type='kaiser_fast')

        # short time fft over n_fft samples
        fft_data = librosa.stft(data_np, n_fft,
                                hop_length=n_fft,
                                center=False)

        # calculate FFT or Mel
        if is_fft:
            fft_data_mag = np.abs(fft_data[0:n_fft // 2]) ** 2
            fft_data_mag *= 2**3
            fft_output = get_output_fft_bins(fft_data_mag, n_out_bins)
        else:
            fft_data_mag = np.abs(fft_data)**2
            fft_data_mag *= 2**2
            mel_data = librosa.feature.melspectrogram(S=fft_data_mag, sr=sample_rate / 4, n_mels=n_mel)
            fft_output = get_output_fft_bins(mel_data, n_out_bins)

        # output uint8_t
        fft_output = fft_output.astype(np.uint8)
github GianlucaPaolocci / Sound-classification-on-Raspberry-Pi-with-Tensorflow / classiPi.py View on Github external
def extract_features():
    X = sounddevice.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sounddevice.wait()
    X= np.squeeze(X)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    features = np.vstack([features,ext_features])
    return features
github pkmital / time-domain-neural-audio-style-transfer / audio_style_transfer / models / uylanov.py View on Github external
def read_audio_spectum(filename, n_fft=2048, hop_length=512, sr=22050):
    x, sr = librosa.load(filename, sr=sr)
    S = librosa.stft(x, n_fft, hop_length)
    S = np.log1p(np.abs(S)).T
    return S, sr
github omerktz / TraFix / open_nmt / onmt / inputters / audio_dataset.py View on Github external
assert sample_rate_ == sample_rate, \
            'Sample rate of %s != -sample_rate (%d vs %d)' \
            % (audio_path, sample_rate_, sample_rate)

        sound = sound.numpy()
        if len(sound.shape) > 1:
            if sound.shape[1] == 1:
                sound = sound.squeeze()
            else:
                sound = sound.mean(axis=1)  # average multiple channels

        n_fft = int(sample_rate * window_size)
        win_length = n_fft
        hop_length = int(sample_rate * window_stride)
        # STFT
        d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=window)
        spect, _ = librosa.magphase(d)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if normalize_audio:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)
        return spect
github fatchord / WaveRNN / notebooks / utils / dsp.py View on Github external
def stft(y):
    return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
github jjery2243542 / voice_conversion / postprocess / utils.py View on Github external
mag: Magnitude spectrogram.
    phase_angle: Initial condition for phase.
    n_fft: Size of the FFT.
    hop: Stride of FFT. Defaults to n_fft/2.
    num_iters: Griffin-Lim iterations to perform.

  Returns:
    audio: 1-D array of float32 sound samples.
  """
  fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
  ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
  complex_specgram = inv_magphase(mag, phase_angle)
  for i in range(num_iters):
    audio = librosa.istft(complex_specgram, **ifft_config)
    if i != num_iters - 1:
      complex_specgram = librosa.stft(audio, **fft_config)
      _, phase = librosa.magphase(complex_specgram)
      phase_angle = np.angle(phase)
      complex_specgram = inv_magphase(mag, phase_angle)
  return audio
github espnet / espnet / espnet / transform / spectrogram.py View on Github external
def spectrogram(x, n_fft, n_shift,
                win_length=None, window='hann'):
    spc = np.abs(librosa.stft(x, n_fft, n_shift, win_length, window=window)).T
    return spc
github andabi / deep-voice-conversion / tools / plot_spectrogram.py View on Github external
filename = '/Users/avin/git/vc/outputs/male.wav'
sr = 22050
n_fft = 1024
len_hop = n_fft / 4
plot_wav = False
plot_spec = True

# Waveforms
wav = read(filename, sr, mono=True)
# wav = np.where(wav == 0, 1000, wav)
# wav = np.zeros_like(wav)
# wav[0] = np.ones_like(wav[0])

# Spectrogram
spec = librosa.stft(wav, n_fft=n_fft, hop_length=len_hop)

# Plot waveforms
if plot_wav:
    plt.figure(1)

    librosa.display.waveplot(wav, sr=sr, color='b')
    plt.title('waveform')

    plt.tight_layout()
    plt.show()

# Plot spectrogram
if plot_spec:
    plt.figure(2)

    librosa.display.specshow(librosa.amplitude_to_db(spec, ref=np.max), sr=sr, hop_length=len_hop, y_axis='linear', x_axis='time')
github nihal111 / voice-conversion / convert.py View on Github external
def _get_mfcc_log_spec_and_log_mel_spec(wav, preemphasis_coeff, n_fft, win_length, hop_length):
    '''
    Args:
    wav - Wave object loaded using librosa

    Returns:
    mfcc - coefficients
    mag - magnitude spectrum
    mel
    '''
    # Pre-emphasis
    y_preem = preemphasis(wav, coeff=preemphasis_coeff)

    # Get spectrogram
    D = librosa.stft(y=y_preem, n_fft=n_fft,
                     hop_length=hop_length, win_length=win_length)
    mag = np.abs(D)

    # Get mel-spectrogram
    mel_basis = librosa.filters.mel(
        hp.Default.sr, hp.Default.n_fft, hp.Default.n_mels)  # (n_mels, 1+n_fft//2)
    mel = np.dot(mel_basis, mag)  # (n_mels, t) # mel spectrogram

    # Get mfccs
    db = librosa.amplitude_to_db(mel)
    mfccs = np.dot(librosa.filters.dct(hp.Default.n_mfcc, db.shape[0]), db)
    # Log
    mag = np.log(mag + sys.float_info.epsilon)
    mel = np.log(mel + sys.float_info.epsilon)

    # Normalization
github YuriyGuts / dechorder / data / rendered / featurize.py View on Github external
def featurize_file(audio_filename, label_filename):
    df_labels = pd.read_csv(label_filename)
    y, sr = librosa.load(audio_filename)
    spectrogram = np.abs(librosa.stft(y))
    chroma = librosa.feature.chroma_stft(S=spectrogram, sr=sr)

    file_duration = len(y) / sr
    chroma_per_second = chroma.shape[1] / file_duration

    features = []
    for idx, seconds_start, seconds_end, label in df_labels.itertuples():
        chroma_start_idx = int(np.round(seconds_start * chroma_per_second))
        chroma_end_idx = int(np.round(seconds_end * chroma_per_second))
        chroma_segment = chroma[:, chroma_start_idx:chroma_end_idx]
        features.append(featurize_audio_segment(chroma_segment))

    note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    feature_names = [f'chroma-{note}' for note in note_names]
    df = pd.DataFrame(features, columns=feature_names)
    df['chord'] = df_labels['chord']