Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
config_obj : configuration_parser.Configuration
A new configuration object.
data_congtainer : container.DataContainer
A new data container object.
Raises
------
ValueError
"""
# get the directory where the config file lives
# if this is the 'expm' directory, then go
# up one level.
configpath = config_obj.configdir
pred_file_location = DataReader.locate_files(config_obj['predictions_file'],
configpath)
# get the column name for the labels for the training and testing data
human_score_column = config_obj['human_score_column']
system_score_column = config_obj['system_score_column']
# if the human score column is the same as the
# system score column, raise an error
if human_score_column == system_score_column:
raise ValueError("'human_score_column' and "
"'system_score_column' "
"cannot have the same value.")
# get the name of the optional column that
# contains the second human score
second_human_score_column = config_obj['second_human_score_column']
Returns
-------
jsons : list
A list paths to all configuration json files contained in the output directory
Raises
------
FileNotFoundError
If the directory does not exist or does not contain and output
of an RSMTool experiment.
ValueError
If the given experiment directory contains several JSON configuration
files instead of just one.
"""
full_path_experiment_dir = DataReader.locate_files(experiment_dir, configpath)
if not full_path_experiment_dir:
raise FileNotFoundError("The directory {} "
"does not exist.".format(experiment_dir))
else:
# check that there is an output directory
csvdir = normpath(join(full_path_experiment_dir, 'output'))
if not exists(csvdir):
raise FileNotFoundError("The directory {} does not contain "
"the output of an rsmtool "
"experiment.".format(full_path_experiment_dir))
# find the json configuration files for all experiments stored in this directory
jsons = glob.glob(join(csvdir, '*.json'))
if len(jsons) == 0:
raise FileNotFoundError("The directory {} does not contain "
"the .json configuration files for rsmtool "
logger = logging.getLogger(__name__)
configuration = configure('rsmpredict', config_file_or_obj_or_dict)
# get the experiment ID
experiment_id = configuration['experiment_id']
# Get output format
file_format = configuration.get('file_format', 'csv')
# Get DataWriter object
writer = DataWriter(experiment_id)
# get the input file containing the feature values
# for which we want to generate the predictions
input_features_file = DataReader.locate_files(configuration['input_features_file'],
configuration.configdir)
if not input_features_file:
raise FileNotFoundError('Input file {} does not exist'
''.format(configuration['input_features_file']))
experiment_dir = DataReader.locate_files(configuration['experiment_dir'],
configuration.configdir)
if not experiment_dir:
raise FileNotFoundError('The directory {} does not exist.'
''.format(configuration['experiment_dir']))
else:
experiment_output_dir = normpath(join(experiment_dir, 'output'))
if not exists(experiment_output_dir):
raise FileNotFoundError('The directory {} does not contain '
'the output of an rsmtool experiment.'.format(experiment_dir))
Returns
-------
custom_report_sections : list of str
List of absolute paths to the custom section
notebooks.
Raises
------
FileNotFoundError
If any of the files cannot be found.
"""
custom_report_sections = []
for cs_path in custom_report_section_paths:
cs_location = DataReader.locate_files(cs_path, configdir)
if not cs_location:
raise FileNotFoundError("Error: custom section not found at "
"{}.".format(cs_path))
else:
custom_report_sections.append(cs_location)
return custom_report_sections
A DataContainer object.
Raises
------
ValueError
If the columns in the config file do not exist in the data.
"""
train = data_container_obj.train
test = data_container_obj.test
feature_specs = data_container_obj.get_frame('feature_specs')
feature_subset = data_container_obj.get_frame('feature_subset_specs')
configdir = config_obj.configdir
(test_file_location,
train_file_location) = DataReader.locate_files([config_obj['test_file'],
config_obj['train_file']],
configdir)
feature_subset_file = config_obj['feature_subset_file']
if feature_subset_file is not None:
feature_subset_file = DataReader.locate_files(feature_subset_file, configdir)
# get the column name for the labels for the training and testing data
train_label_column = config_obj['train_label_column']
test_label_column = config_obj['test_label_column']
# get the column name that will hold the ID for
# both the training and the test data
id_column = config_obj['id_column']