Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# 2. Load the audio as a waveform `y`
# Store the sampling rate as `sr`
#captura da musica
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()
lista = []
count=0
for l in lines:
#carregamento dos arquivos
music, erro = l.split("\n",1)
#VERIFIQUE O CAMINHO, POR FAVOR
y, sr = librosa.load('/home/douglas/Música/musicas/wav/tristes/'+music,sr=44100)
S = np.abs(librosa.core.stft(y, n_fft=2048, hop_length=512, win_length=1024, window='hann'))
contrast = librosa.feature.spectral_contrast(S=S)
print(music,contrast.mean())
lista.append(contrast.mean())
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()
count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
music, erro = m.split("\n",1)
arq.write(music+","+str(lista[count])+"\n")
count+=1
def get_audio(self):
temp_file_name = "temp_" + self.url.replace('_', '-')
self.video.streams.first().download(filename=temp_file_name)
if not os.path.isfile(temp_file_name + ".mp4"):
raise Exception("crawled file is not in format of mp4")
cmd = FFMPEG_TEMPLATE.format(temp_file_name).split()
subprocess.check_output(cmd)
audio_data = librosa.core.load(temp_file_name+".wav", 16000)[0]
os.remove(temp_file_name + ".mp4")
os.remove(temp_file_name + ".wav")
return audio_data
for s in list(os.walk(files_dir+'/centroid', topdown=False))[-1][-1]:
if str(t[0]).split('.')[0] == s.split('centroid.ogg')[0]:
shutil.copy(files_dir+'/centroid/'+s, files_dir+'/centroid/'+str(c)+'/'+s)
print t
try:
simil_audio = [MonoLoader(filename=files_dir+'/centroid/'+str(c)+f)() for f in list(os.walk(files_dir+'/centroid/'+str(c), topdown = False))[-1][-1]]
audio0 = scratch_music(choice(simil_audio))
audio1 = scratch_music(choice(simil_audio))
del simil_audio
audio_N = min([len(i) for i in [audio0, audio1]])
audio_samples = [i[:audio_N]/i.max() for i in [audio0, audio1]]
simil_x = np.array(audio_samples).sum(axis=0)
del audio_samples
simil_x = 0.5*simil_x/simil_x.max()
h, p = librosa.decompose.hpss(librosa.core.stft(simil_x))
del simil_x, p
h = librosa.istft(h)
MonoWriter(filename=files_dir+'/centroid/'+str(c)+'/remix/'+'similarity_mix_centroid.ogg', format = 'ogg', sampleRate = 44100)(h)
del h
except Exception, e:
print e
continue
win_length : int default=4096
window size for doing stft analysis
theta : float default=0.15
bias on the smoothed signal in the context of logistic function
higher theta reduces envelope activation sensitivity
lower theta increases envelope activation sensitivity
Returns
-------
y_env : ndarray
a vector specifying the amplitude envelope
"""
S = librosa.stft(
y_input, n_fft=win_length, hop_length=win_length,
win_length=win_length)
S_samples = librosa.core.frames_to_samples(
range(len(S[0])), hop_length=win_length)
y_smooth = np.mean(np.abs(S), axis=0)
# normalization (to overall energy)
if np.max(np.abs(y_smooth)) > 0:
y_smooth = y_smooth / np.max(np.abs(y_smooth))
# binary thresholding for low overall energy events
y_smooth[y_smooth < thresh] = 0
# LP filter
b_coeff, a_coeff = scipy.signal.butter(2, lpf_cutoff, 'low')
y_smooth = scipy.signal.filtfilt(b_coeff, a_coeff, y_smooth)
# logistic function to semi-binarize the output; confidence value
y_conf = 1.0 - (1.0 / (1.0 + np.exp(np.dot(alpha, (y_smooth - theta)))))
def concat_and_upload(estimates_path, gsc_estimates_path, sr=22050):
for root, dirs, files in os.walk(estimates_path):
if not files:
continue
files.sort()
audio_data = np.concatenate([librosa.core.load(os.path.join(root, name))[0] for name in files])
librosa.output.write_wav(root+'.wav', audio_data, sr)
for name in files:
os.remove(os.path.join(root, name))
os.rmdir(root)
#upload_to_gcs([root+'.wav'], gsc_estimates_path)
video_fp = os.path.join(video_dir, '{}.mp4'.format(youtube_id))
# Fps
out_audio_fp = os.path.join(out_audio_dir, '{}.mp3'.format(youtube_id))
out_audio_feat_fp = os.path.join(out_audio_feat_dir,
'{}.npy'.format(youtube_id))
if os.path.exists(out_audio_feat_fp):
print('Done before: {}'.format(video_fp))
return
# Extract audio
get_audio_from_video(video_fp, out_audio_fp, sr)
# Extract feature
try:
duration = librosa.core.get_duration(filename=out_audio_fp)
if duration < time_range[1]:
print('Audio too short: {}'.format(video_fp))
return
sig, sr = librosa.core.load(out_audio_fp, sr=sr,
offset=time_range[0],
duration=time_range[1]-time_range[0])
feat_ = librosa.feature.melspectrogram(sig, sr=sr,
n_fft=win_size,
hop_length=hop_size,
n_mels=num_mels).T
feat = np.log(1+10000*feat_)
np.save(out_audio_feat_fp, feat)
print('Done: {} -- {}'.format(video_fp, youtube_id))
except Exception as e:
print('Exception in extracting feature: {}. {}'.format(video_fp,
def preprocess_audio(data, config):
amp_spectrum = librosa.core.stft(data, n_fft=config["n_fft"], hop_length=config["hop_length"], pad_mode='constant')
print_data('amp_spectrum data', amp_spectrum)
# np.abs(D[f, t]) is the magnitude of frequency bin f at frame t
power_spectrum = np.abs(amp_spectrum)**2
print_data('power spectrogram data', power_spectrum)
# corresponding librosa operations
# in order to use pad mode = 'constant' for stft, melspectrogram must be computed manually as in this preprocessing script
# default pad_mode for stft is reflection padding
# S, _ = librosa.spectrum._spectrogram(y=data, n_fft=config["n_fft"], hop_length=config["hop_length"],
# power=2)
# print_data('power spectrogram generated through _spectrogram', S)
mel_basis = librosa.filters.mel(sample_rate, n_fft=config["n_fft"], n_mels=config["n_mels"], fmin=config["fmin"], fmax=config["fmax"])
print_data('mel_basis', mel_basis)
def _create_tmp_mirror_file(self):
out, err, returncode = run(
'ffmpeg -y -i {0} -ac 2 -acodec pcm_f32le {1}'.format(
self.audio_path, self.tmp_path))
self.samples, self.sample_rate = lr.core.load(self.tmp_path,
sr=None, mono=False)
logging.debug(out)
logging.debug(err)
def _wav_to_cqt(wav_audio, hparams):
"""Transforms the contents of a wav file into a series of CQT frames."""
y = audio_io.wav_data_to_samples(wav_audio, hparams.sample_rate)
cqt = np.abs(
librosa.core.cqt(
y,
hparams.sample_rate,
hop_length=hparams.spec_hop_length,
fmin=hparams.spec_fmin,
n_bins=hparams.spec_n_bins,
bins_per_octave=hparams.cqt_bins_per_octave),
dtype=np.float32)
# Transpose so that the data is in [frame, bins] format.
cqt = cqt.T
return cqt
def make_dataset(wsj0_path=WSJ0_PATH, dataset_save_path=DATASET_SAVE_PATH):
dataset_fileName = dataset_save_path + '/wsj0_normalize_{}_{}.pic'.format(N_FFT, HOP_LENGTH)
if os.path.isfile(dataset_fileName):
print(dataset_fileName, " already exist. Skip this phase.")
return 0
print("Start making dataset ...")
dataset = []
for fileName in progressbar(glob.glob(wsj0_path + "/*.wav")):
wav, _ = snd.read(fname)
pwr_spec_FT = np.abs(librosa.core.stft(wav, n_fft=args.n_fft, hop_length=args.hop_length)) ** 2
vad_result = vad(pwr_spec_FT)
pwr_spec_FT /= (pwr_spec_FT.sum(axis=0)[vad(pwr_spec_FT)]).mean()
dataset.append(np.array(pwr_spec_FT, dtype=np.float32))
dataset = np.hstack(dataset)
print("Writing to pickle file ...")
pic.dump(dataset, open(dataset_fileName, 'wb'), protocol=4)