Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Create the transformer instances needed to process the given dtypes.
Args:
dtypes (dict):
mapping of field names and dtypes.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformer_templates = {
'i': rdt.transformers.NumericalTransformer(dtype=int),
'f': rdt.transformers.NumericalTransformer(dtype=float),
'O': rdt.transformers.CategoricalTransformer,
'b': rdt.transformers.BooleanTransformer,
'M': rdt.transformers.DatetimeTransformer,
}
transformer_templates.update(self._transformer_templates)
transformers = dict()
for name, dtype in dtypes.items():
transformer_template = transformer_templates[np.dtype(dtype).kind]
if isinstance(transformer_template, type):
transformer = transformer_template()
else:
transformer = copy.deepcopy(transformer_template)
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers[name] = transformer
return transformers
mapping of field names and transformer instances.
"""
transformers_dict = dict()
for name, dtype in dtypes.items():
dtype = np.dtype(dtype)
if dtype.kind == 'i':
transformer = transformers.NumericalTransformer(dtype=int)
elif dtype.kind == 'f':
transformer = transformers.NumericalTransformer(dtype=float)
elif dtype.kind == 'O':
anonymize = pii_fields.get(name)
transformer = transformers.CategoricalTransformer(anonymize=anonymize)
elif dtype.kind == 'b':
transformer = transformers.BooleanTransformer()
elif dtype.kind == 'M':
transformer = transformers.DatetimeTransformer()
else:
raise ValueError('Unsupported dtype: {}'.format(dtype))
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers_dict[name] = transformer
return transformers_dict