Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _build_train_pool(X, y, cat_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description):
train_pool = None
if isinstance(X, Pool):
train_pool = X
if any(v is not None for v in [cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline]):
raise CatboostError("cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline should have the None type when X has catboost.Pool type.")
if X.get_label() is None and X.num_pairs() == 0:
raise CatboostError("Label in X has not initialized.")
if y is not None:
raise CatboostError("Wrong initializing y: X is catboost.Pool object, y must be initialized inside catboost.Pool.")
elif isinstance(X, STRING_TYPES):
train_pool = Pool(data=X, pairs=pairs, column_description=column_description)
else:
if y is None:
raise CatboostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
train_pool = Pool(X, y, cat_features=cat_features, pairs=pairs, weight=sample_weight, group_id=group_id,
group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
return train_pool
def _build_train_pool(X, y, cat_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description):
train_pool = None
if isinstance(X, Pool):
train_pool = X
if any(v is not None for v in [cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline]):
raise CatboostError("cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline should have the None type when X has catboost.Pool type.")
if X.get_label() is None and X.num_pairs() == 0:
raise CatboostError("Label in X has not initialized.")
if y is not None:
raise CatboostError("Wrong initializing y: X is catboost.Pool object, y must be initialized inside catboost.Pool.")
elif isinstance(X, STRING_TYPES):
train_pool = Pool(data=X, pairs=pairs, column_description=column_description)
else:
if y is None:
raise CatboostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
train_pool = Pool(X, y, cat_features=cat_features, pairs=pairs, weight=sample_weight, group_id=group_id,
group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
return train_pool
def _predict(self, data, prediction_type, ntree_start, ntree_end, thread_count, verbose):
verbose = verbose or self.get_param('verbose')
if verbose is None:
verbose = False
if not self.is_fitted():
raise CatboostError("There is no trained model to use predict(). Use fit() to train model. Then use predict().")
if not isinstance(data, Pool):
data = Pool(
data=data,
cat_features=self._get_cat_feature_indices() if not isinstance(data, FeaturesData) else None
)
if not isinstance(prediction_type, STRING_TYPES):
raise CatboostError("Invalid prediction_type type={}: must be str().".format(type(prediction_type)))
if prediction_type not in ('Class', 'RawFormulaVal', 'Probability'):
raise CatboostError("Invalid value of prediction_type={}: must be Class, RawFormulaVal or Probability.".format(prediction_type))
loss_function_type = self.get_param('loss_function')
if loss_function_type is None:
loss_function_type = self.get_param('objective')
# TODO(kirillovs): very bad solution. user should be able to use custom multiclass losses
if loss_function_type is not None and (loss_function_type == 'MultiClass' or loss_function_type == 'MultiClassOneVsAll'):
return np.transpose(self._base_predict_multi(data, prediction_type, ntree_start, ntree_end, thread_count, verbose))
predictions = np.array(self._base_predict(data, prediction_type, ntree_start, ntree_end, thread_count, verbose))
if prediction_type == 'Probability':
predictions = np.transpose([1 - predictions, predictions])
export_parameters : dict
Parameters for CoreML export:
* prediction_type : string - either 'probability' or 'raw'
* coreml_description : string
* coreml_model_version : string
* coreml_model_author : string
* coreml_model_license: string
pool : catboost.Pool or list or numpy.array or pandas.DataFrame or pandas.Series or catboost.FeaturesData
Training pool.
"""
if not self.is_fitted():
raise CatboostError("There is no trained model to use save_model(). Use fit() to train model. Then use save_model().")
if not isinstance(fname, STRING_TYPES):
raise CatboostError("Invalid fname type={}: must be str().".format(type(fname)))
if pool is not None and not isinstance(pool, Pool):
pool = Pool(
data=pool,
cat_features=self._get_cat_feature_indices() if not isinstance(pool, FeaturesData) else None
)
self._save_model(fname, format, export_parameters, pool)
if train_pool.is_empty_:
raise CatboostError("X is empty.")
allow_clear_pool = not isinstance(X, Pool)
eval_set_list = eval_set if isinstance(eval_set, list) else [eval_set]
eval_sets = []
eval_total_row_count = 0
for eval_set in eval_set_list:
if isinstance(eval_set, Pool):
eval_sets.append(eval_set)
eval_total_row_count += eval_sets[-1].num_row()
if eval_sets[-1].num_row() == 0:
raise CatboostError("Empty 'eval_set' in Pool")
elif isinstance(eval_set, STRING_TYPES):
eval_sets.append(Pool(eval_set, column_description=column_description))
eval_total_row_count += eval_sets[-1].num_row()
if eval_sets[-1].num_row() == 0:
raise CatboostError("Empty 'eval_set' in file {}".format(eval_set))
elif isinstance(eval_set, tuple):
if len(eval_set) != 2:
raise CatboostError("Invalid shape of 'eval_set': {}, must be (X, y).".format(str(tuple(type(_) for _ in eval_set))))
eval_sets.append(Pool(eval_set[0], eval_set[1], cat_features=train_pool.get_cat_feature_indices()))
eval_total_row_count += eval_sets[-1].num_row()
if eval_sets[-1].num_row() == 0:
raise CatboostError("Empty 'eval_set' in tuple")
elif eval_set is None:
if len(eval_set_list) > 1:
raise CatboostError("Multiple eval set shall not contain None")
else:
raise CatboostError("Invalid type of 'eval_set': {}, while expected Pool or (X, y) or filename, or list thereof.".format(type(eval_set)))
Values are calculated for RawFormulaVal predictions.
- Interaction
list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
"""
if not isinstance(verbose, bool) and not isinstance(verbose, int):
raise CatboostError('verbose should be bool or int.')
verbose = int(verbose)
if verbose < 0:
raise CatboostError('verbose should be non-negative.')
fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
or (fstr_type == EFstrType.Interaction))
if not empty_data_is_ok:
if not isinstance(data, Pool):
raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
if data.is_empty_:
raise CatboostError("data is empty.")
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances
if fstr_type == EFstrType.ShapValues:
if isinstance(fstr[0][0], ARRAY_TYPES):
return np.array([np.array([np.array([
value for value in dimension]) for dimension in doc]) for doc in fstr])