Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_isolation_forest():
import shap
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.ensemble.iforest import _average_path_length
X,y = shap.datasets.boston()
iso = IsolationForest( behaviour='new', contamination='auto')
iso.fit(X)
explainer = shap.TreeExplainer(iso)
shap_values = explainer.shap_values(X)
score_from_shap = - 2**(
- (np.sum(shap_values, axis=1) + explainer.expected_value) /
_average_path_length(np.array([iso.max_samples_]))[0]
)
assert np.allclose(iso.score_samples(X), score_from_shap, atol=1e-7)
max_depth = 6
# train a model with single tree
Xd = xgboost.DMatrix(X, label=y)
model = xgboost.train({'eta':1,
'max_depth':max_depth,
'base_score': 0,
"lambda": 0},
Xd, 1)
ypred = model.predict(Xd)
# Compare for five random samples
for i in range(5):
x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]
expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
f = lambda inp : model.predict(xgboost.DMatrix(inp))
expl_kern = shap.KernelExplainer(f, X)
itshap = expl.shap_values(x)
kshap = expl_kern.shap_values(x, nsamples=150)
assert np.allclose(itshap,kshap), \
"Kernel SHAP doesn't match Independent Tree SHAP!"
assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
"SHAP values don't sum to model output!"
problem_bounds = [(-1e6, 3e6), (-1e6, 3e6)]
# Don't worry about "objective has been evaluated" warnings.
result_et = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "ET")
result_rf = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "RF")
et_df = pd.DataFrame(result_et.x_iters, columns = ["X0", "X1"])
# Explain the model's predictions.
explainer_et = shap.TreeExplainer(result_et.models[-1], et_df)
shap_values_et = explainer_et.shap_values(et_df)
rf_df = pd.DataFrame(result_rf.x_iters, columns = ["X0", "X1"])
# Explain the model's predictions (Random forest).
explainer_rf = shap.TreeExplainer(result_rf.models[-1], rf_df)
shap_values_rf = explainer_rf.shap_values(rf_df)
assert np.allclose(shap_values_et.sum(1) + explainer_et.expected_value, result_et.models[-1].predict(et_df))
assert np.allclose(shap_values_rf.sum(1) + explainer_rf.expected_value, result_rf.models[-1].predict(rf_df))
y = np.matmul(X,b)
max_depth = 6
# train a model with single tree
Xd = xgboost.DMatrix(X, label=y)
model = xgboost.train({'eta':1,
'max_depth':max_depth,
'base_score': 0,
"lambda": 0},
Xd, 20)
ypred = model.predict(Xd)
# Compare for five random samples
for i in range(5):
x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]
expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
itshap = expl.shap_values(x)
assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
"SHAP values don't sum to model output!"
transformed_data: Table,
transformed_reference_data: Table,
progress_callback: Callable,
) -> Tuple[
Optional[List[np.ndarray]], Optional[np.ndarray], Optional[np.ndarray]
]:
"""
Computes and returns SHAP values for learners that are explained by
TreeExplainer: all sci-kit models based on trees. In case that explanation
with TreeExplainer is not possible it returns None
"""
if sparse.issparse(transformed_data.X):
# sparse not supported by TreeExplainer, KernelExplainer can handle it
return None, None, None
try:
explainer = TreeExplainer(
model.skl_model, data=sample(transformed_reference_data.X, 100),
)
except (SHAPError, AttributeError):
return None, None, None
# TreeExplaner cannot explain in normal time more cases than 1000
data_sample, sample_mask = _subsample_data(transformed_data, 1000)
num_classes = (
len(transformed_data.domain.class_var.values)
if transformed_data.domain.class_var.is_discrete
else None
)
# this method will work in batches since explaining only one attribute
# at time the processing timed doubles comparing to batch size 10
shap_values = []
import lightgbm as lgb
import shap
self.model = model
self.model_name = model_name
self.x_train = x_train
self.x_test = x_test
self.y_test = y_test
if learner == "linear":
self.explainer = shap.LinearExplainer(
self.model, self.x_train, feature_dependence="independent"
)
elif learner == "tree":
self.explainer = shap.TreeExplainer(self.model)
elif learner == "kernel":
if hasattr(self.model, "predict_proba"):
func = self.model.predict_proba
else:
func = self.model.predict
self.explainer = shap.KernelExplainer(func, self.x_train)
else:
raise ValueError(f"Learner: {learner} is not supported yet.")
self.expected_value = self.explainer.expected_value
self.shap_values = np.array(self.explainer.shap_values(self.x_test)).astype(
float
)
if isinstance(self.model, lgb.sklearn.LGBMClassifier) and isinstance(
def get_shap_values(self):
"""
Calculates shapley values for each treatment group.
"""
shap_dict = {}
for group, mod in self.models_tau.items():
explainer = shap.TreeExplainer(mod)
if self.r_learners is not None:
explainer.model.original_model.params['objective'] = None # hacky way of running shap without error
shap_values = explainer.shap_values(self.X)
shap_dict[group] = shap_values
return shap_dict
def explain(shap_exp: Explanation, training_df, test_df, explanation_target):
job = shap_exp.job
model = joblib.load(job.predictive_model.model_path)
model = model[0]
shap.initjs()
explainer = shap.TreeExplainer(model)
merged_df = pd.concat([training_df, test_df])
shap_values = explainer.shap_values(merged_df.drop(['trace_id', 'label'], 1))
encoder = retrieve_proper_encoder(job)
encoder.decode(merged_df, job.encoding)
encoder.decode(test_df, job.encoding)
explanation_target_int = merged_df[merged_df['trace_id'] == explanation_target].index.item() + \
training_df.drop(['trace_id', 'label'], 1).shape[0]
explanation_target_vector = test_df[test_df['trace_id'] == explanation_target].drop(['trace_id', 'label'], 1)
expected_value = explainer.expected_value[0] if len(explainer.expected_value) > 1 else explainer.expected_value
shap_value = shap_values[explanation_target_int, :] if hasattr(shap_values, "size") else shap_values[0][
explanation_target_int, :]
name = create_unique_name("temporal_shap.svg")
shap.force_plot(expected_value, shap_value, explanation_target_vector,
def handle(self, *args, **kwargs):
TARGET_MODEL = 68
job = Job.objects.filter(pk=TARGET_MODEL)[0]
model = joblib.load(job.predictive_model.model_path)
model = model[0]
training_df, test_df = get_encoded_logs(job)
EXPLANATION_TARGET = 2_3300
FEATURE_TARGET = 1
shap.initjs()
explainer = shap.TreeExplainer(model)
training_df = training_df.drop(['trace_id','label'], 1)
shap_values = explainer.shap_values(training_df)
encoder = retrieve_proper_encoder(job)
encoder.decode(training_df, job.encoding)
shap.force_plot(explainer.expected_value, shap_values[EXPLANATION_TARGET, :],training_df.iloc[EXPLANATION_TARGET, :],
show=False, matplotlib=True).savefig('shap_plot_train_1_3.png')