Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
_cmvn_stats[0, :-1] = sum_feats[spk]
_cmvn_stats[1, :-1] = square_sum_feats[spk]
_cmvn_stats[0, -1] = counts[spk]
_cmvn_stats[1, -1] = 0.
# You can get the mean and std as following,
# >>> N = _cmvn_stats[0, -1]
# >>> mean = _cmvn_stats[0, :-1] / N
# >>> std = np.sqrt(_cmvn_stats[1, :-1] / N - mean ** 2)
cmvn_stats[spk] = _cmvn_stats
# Per utterance or speaker CMVN
if is_wspecifier:
with file_writer_helper(args.wspecifier_or_wxfilename,
filetype=args.out_filetype) as writer:
for spk, mat in cmvn_stats.items():
writer[spk] = mat
# Global CMVN
else:
matrix = cmvn_stats[None]
if args.out_filetype == 'npy':
np.save(args.wspecifier_or_wxfilename, matrix)
elif args.out_filetype == 'mat':
# Kaldi supports only matrix or vector
kaldiio.save_mat(args.wspecifier_or_wxfilename, matrix)
else:
raise RuntimeError('Not supporting: --out-filetype {}'
.format(args.out_filetype))
def main():
parser = get_parser()
args = parser.parse_args()
logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
if args.verbose > 0:
logging.basicConfig(level=logging.INFO, format=logfmt)
else:
logging.basicConfig(level=logging.WARN, format=logfmt)
logging.info(get_commandline_args())
with kaldiio.ReadHelper(args.rspecifier,
segments=args.segments) as reader, \
file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
for utt_id, (rate, array) in reader:
assert rate == args.fs
array = array.astype(numpy.float32)
if args.normalize is not None and args.normalize != 1:
array = array / (1 << (args.normalize - 1))
lmspc = logmelspectrogram(
x=array,
fs=args.fs,
n_mels=args.n_mels,
n_fft=args.n_fft,
model.cuda()
# read json data
with open(args.recog_json, 'rb') as f:
js = json.load(f)['utts']
load_inputs_and_targets = LoadInputsAndTargets(
mode='asr', load_output=False, sort_in_input_length=False,
preprocess_conf=None # Apply pre_process in outer func
)
if args.batchsize == 0:
args.batchsize = 1
# Creates writers for outputs from the network
if args.enh_wspecifier is not None:
enh_writer = file_writer_helper(args.enh_wspecifier,
filetype=args.enh_filetype)
else:
enh_writer = None
# Creates a Transformation instance
preprocess_conf = (
train_args.preprocess_conf if args.preprocess_conf is None
else args.preprocess_conf)
if preprocess_conf is not None:
logging.info('Use preprocessing'.format(preprocess_conf))
transform = Transformation(preprocess_conf)
else:
transform = None
# Creates a IStft instance
istft = None
def main():
parser = get_parser()
args = parser.parse_args()
logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
if args.verbose > 0:
logging.basicConfig(level=logging.INFO, format=logfmt)
else:
logging.basicConfig(level=logging.WARN, format=logfmt)
logging.info(get_commandline_args())
with kaldiio.ReadHelper(args.rspecifier,
segments=args.segments) as reader, \
file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method
) as writer:
for utt_id, (rate, array) in reader:
array = array.astype(numpy.float32)
if rate != args.fs:
array = librosa.resample(array, rate, args.fs)
if args.normalize is not None and args.normalize != 1:
array = array / (1 << (args.normalize - 1))
lmspc = logmelspectrogram(
x=array,
fs=args.fs,
args = parser.parse_args()
logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
if args.verbose > 0:
logging.basicConfig(level=logging.INFO, format=logfmt)
else:
logging.basicConfig(level=logging.WARN, format=logfmt)
logging.info(get_commandline_args())
if args.preprocess_conf is not None:
preprocessing = Transformation(args.preprocess_conf)
logging.info('Apply preprocessing: {}'.format(preprocessing))
else:
preprocessing = None
with file_writer_helper(args.wspecifier,
filetype=args.filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method,
pcm_format=args.format
) as writer:
for utt_id, (rate, array) in kaldiio.ReadHelper(args.rspecifier,
args.segments):
if args.filetype == 'mat':
# Kaldi-matrix doesn't support integer
array = array.astype(numpy.float32)
if array.ndim == 1:
# (Time) -> (Time, Channel)
array = array[:, None]
def main():
parser = get_parser()
args = parser.parse_args()
d = kaldiio.load_ark(args.rspecifier)
with file_writer_helper(
args.wspecifier,
filetype='mat',
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method) as writer:
for utt, mat in d:
writer[utt] = mat
else:
is_rspcifier = False
if args.stats_filetype == 'mat':
stats = kaldiio.load_mat(args.stats_rspecifier_or_rxfilename)
else:
stats = numpy.load(args.stats_rspecifier_or_rxfilename)
stats_dict = {None: stats}
cmvn = CMVN(stats=stats_dict,
norm_means=args.norm_means,
norm_vars=args.norm_vars,
utt2spk=args.utt2spk,
spk2utt=args.spk2utt,
reverse=args.reverse)
with file_writer_helper(
args.wspecifier,
filetype=args.out_filetype,
write_num_frames=args.write_num_frames,
compress=args.compress,
compression_method=args.compression_method) as writer:
for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray]
rate, mat = mat
mat = cmvn(mat, utt if is_rspcifier else None)
writer[utt] = mat