Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_audio_file(filename,resize=False):
sound = None
try:
if filename.endswith('.mp3') or filename.endswith('.MP3'):
sound = AudioSegment.from_mp3(filename)
elif filename.endswith('.wav') or filename.endswith('.WAV'):
sound = AudioSegment.from_wav(filename)
elif filename.endswith('.ogg'):
sound = AudioSegment.from_ogg(filename)
elif filename.endswith('.flac'):
sound = AudioSegment.from_file(filename, "flac")
elif filename.endswith('.3gp'):
sound = AudioSegment.from_file(filename, "3gp")
elif filename.endswith('.3g'):
sound = AudioSegment.from_file(filename, "3gp")
sound = sound.set_frame_rate(samplerate)
sound = sound.set_channels(1)
sound = sound.set_sample_width(2)
duration = sound.duration_seconds
except:
print("Couldn't load file")
return None,None
return sound,duration
def load_audio_file(filename,resize=False):
sound = None
try:
if filename.endswith('.mp3') or filename.endswith('.MP3'):
sound = AudioSegment.from_mp3(filename)
elif filename.endswith('.wav') or filename.endswith('.WAV'):
sound = AudioSegment.from_wav(filename)
elif filename.endswith('.ogg'):
sound = AudioSegment.from_ogg(filename)
elif filename.endswith('.flac'):
sound = AudioSegment.from_file(filename, "flac")
elif filename.endswith('.3gp'):
sound = AudioSegment.from_file(filename, "3gp")
elif filename.endswith('.3g'):
sound = AudioSegment.from_file(filename, "3gp")
sound = sound.set_frame_rate(samplerate)
sound = sound.set_channels(1)
sound = sound.set_sample_width(2)
duration = sound.duration_seconds
except:
print("Couldn't load file")
return None,None
return sound,duration
'''
Attempts to split a call file into different segments each time the speaker changes using
speaker diarization. This method assumes there are two speakers in the file (sales and customer)
and will cut out dial tones and any receptionists before the two speakers' conversation.
'''
# set output directories
no_rings_out_dir = os.path.join(out_loc, 'calls_no_ringtones')
if not os.path.exists(no_rings_out_dir):
os.makedirs(no_rings_out_dir)
diarized_out_dir = os.path.join(out_loc, 'calls_split_by_speaker')
if not os.path.exists(diarized_out_dir):
os.makedirs(diarized_out_dir)
# load in raw audio file
print(call_file)
raw_audio = AudioSegment.from_file(call_file, 'wav')
file_name = os.path.splitext(os.path.basename(call_file))[0]
# uses trained HMM to determine where the ringtones are and only use audio from after
# last detected ring and exports intermediate file
curr_path = os.path.dirname(os.path.realpath(__file__))
ring_labels = aS.hmmSegmentation(call_file, os.path.join(curr_path, 'hmmRingDetect'), False)
segs, flags = aS.flags2segs(ring_labels[0], 1.0) # 1.0 is the mid-term window step from above model
no_rings_audio = raw_audio[segs[-1, 0]*1000:segs[-1, 1]*1000]
temp_out_loc = os.path.join(no_rings_out_dir, file_name) + '.wav'
no_rings_audio.export(temp_out_loc, format='wav')
# split on speakers now setting num speakers to 2
diarized = aS.speakerDiarization(temp_out_loc, 2, mtSize=0.5, mtStep=0.1)
# determine which label was given to customer and salesperson
cust = diarized[0]
def read_and_convert_audio(file_path):
""" Create a GCS AudioSegment from the file_path """
audio_segment = AudioSegment.from_file(file_path)
audio_segment \
.set_channels(1) \
.set_sample_width(2) \
.set_frame_rate(__sample_rate(audio_segment))
return audio_segment
def from_file(path):
"""
Returns an AudioSegment object from the given file based on its file extension.
If the extension is wrong, this will throw some sort of error.
:param path: The path to the file, including the file extension.
:returns: An AudioSegment instance from the file.
"""
_name, ext = os.path.splitext(path)
ext = ext.lower()[1:]
seg = pydub.AudioSegment.from_file(path, ext)
return AudioSegment(seg, path)
def convertMP3(title, ext):
audio = AudioSegment.from_file(title+'.'+ext, format=ext)
audio.export(title+'.mp3', format='mp3')
def toot(cls, ctx, acc, visibility=None, in_reply_to=None):
m = cls.get_session(acc)
if ctx.msg.is_image() or ctx.msg.is_gif() or ctx.msg.is_video() or ctx.msg._view_type in (dc.const.DC_MSG_AUDIO, dc.const.DC_MSG_VOICE):
if ctx.msg.filename.endswith('.aac'):
aac_file = AudioSegment.from_file(ctx.msg.filename, 'aac')
filename = ctx.msg.filename[:-4]+'.mp3'
aac_file.export(filename, format='mp3')
else:
filename = ctx.msg.filename
media = [m.media_post(filename).id]
if in_reply_to:
m.status_reply(m.status(in_reply_to), ctx.text,
media_ids=media, visibility=visibility)
else:
m.status_post(ctx.text, media_ids=media, visibility=visibility)
elif ctx.text:
if in_reply_to:
m.status_reply(m.status(in_reply_to),
ctx.text, visibility=visibility)
else:
m.status_post(ctx.text, visibility=visibility)
def transcribe(lang: str='en', model: str='tdnn'):
"""
Transcribe audio
"""
if request.method == "POST":
try:
f = request.files['file']
filename = secure_filename(f.filename)
wav_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename)
f.save(wav_filename)
complete_audio = AudioSegment.from_file(wav_filename)
chunks = split_on_silence(complete_audio, silence_thresh=-26, min_silence_len=500, keep_silence=500)
chunks = chunks if len(chunks)>0 else [complete_audio]
except:
return jsonify(status='error', description="Unable to find 'file'")
try:
transcriptions = []
for i, chunk in enumerate(chunks):
chunk_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename.strip(".wav")+"chunk"+str(i)+".wav")
chunk.export(chunk_filename, format="wav")
config_obj = config.config[lang][model]
config_obj["wav_filename"] = chunk_filename
transcription = inference.inference(config_obj)
transcriptions.append(transcription)
except:
return jsonify(status='error', description="Wrong lang or model")
def calculateAudioTrackSHA256_pyav(path):
data, properties = audioSamplesFromAudioFile(path)
audioSha256sum = calculateSHA256_data(data)
# print('size:', len(audio_segment.raw_data))
if config['enable_internal_checks']:
if hasattr(path, 'seek'):
path.seek(0)
audio_segment = AudioSegment.from_file(path)
pydubAudioSha256sum = calculateSHA256_data(audio_segment.raw_data)
if audio_segment.raw_data != data or \
pydubAudioSha256sum != audioSha256sum:
raise Exception('SHA256sum IS DIFFERENT BETWEEN PYAV AND PYDUB')
print('pyav/pydub decode check ' +
TerminalColors.Ok + 'OK' + TerminalColors.ENDC)
return audioSha256sum, data, properties