How to use the shap.TreeExplainer function in shap

To help you get started, we’ve selected a few shap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_isolation_forest():
    import shap
    import numpy as np
    from sklearn.ensemble import IsolationForest
    from sklearn.ensemble.iforest import _average_path_length

    X,y = shap.datasets.boston()
    iso = IsolationForest( behaviour='new', contamination='auto')
    iso.fit(X)

    explainer = shap.TreeExplainer(iso)
    shap_values = explainer.shap_values(X)

    score_from_shap = - 2**(
        - (np.sum(shap_values, axis=1) + explainer.expected_value) /
        _average_path_length(np.array([iso.max_samples_]))[0]
        )
    assert np.allclose(iso.score_samples(X), score_from_shap, atol=1e-7)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
max_depth = 6

    # train a model with single tree
    Xd = xgboost.DMatrix(X, label=y)
    model = xgboost.train({'eta':1, 
                       'max_depth':max_depth, 
                       'base_score': 0, 
                       "lambda": 0}, 
                      Xd, 1)
    ypred = model.predict(Xd)

    # Compare for five random samples
    for i in range(5):
        x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]

        expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
        f = lambda inp : model.predict(xgboost.DMatrix(inp))
        expl_kern = shap.KernelExplainer(f, X)

        itshap = expl.shap_values(x)
        kshap = expl_kern.shap_values(x, nsamples=150)
        assert np.allclose(itshap,kshap), \
        "Kernel SHAP doesn't match Independent Tree SHAP!"
        assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
        "SHAP values don't sum to model output!"
github slundberg / shap / tests / explainers / test_tree.py View on Github external
problem_bounds = [(-1e6, 3e6), (-1e6, 3e6)]

    # Don't worry about "objective has been evaluated" warnings.
    result_et = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "ET")
    result_rf = skopt.forest_minimize(objective_function, problem_bounds, n_calls = 100, base_estimator = "RF")

    et_df = pd.DataFrame(result_et.x_iters, columns = ["X0", "X1"])

    # Explain the model's predictions.
    explainer_et = shap.TreeExplainer(result_et.models[-1], et_df)
    shap_values_et = explainer_et.shap_values(et_df)

    rf_df = pd.DataFrame(result_rf.x_iters, columns = ["X0", "X1"])

    # Explain the model's predictions (Random forest).
    explainer_rf = shap.TreeExplainer(result_rf.models[-1], rf_df)
    shap_values_rf = explainer_rf.shap_values(rf_df)

    assert np.allclose(shap_values_et.sum(1) + explainer_et.expected_value, result_et.models[-1].predict(et_df))
    assert np.allclose(shap_values_rf.sum(1) + explainer_rf.expected_value, result_rf.models[-1].predict(rf_df))
github slundberg / shap / tests / explainers / test_tree.py View on Github external
y = np.matmul(X,b)
    max_depth = 6

    # train a model with single tree
    Xd = xgboost.DMatrix(X, label=y)
    model = xgboost.train({'eta':1, 
                       'max_depth':max_depth, 
                       'base_score': 0, 
                       "lambda": 0}, 
                      Xd, 20)
    ypred = model.predict(Xd)

    # Compare for five random samples
    for i in range(5):
        x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]
        expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
        itshap = expl.shap_values(x)
        assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
        "SHAP values don't sum to model output!"
github biolab / orange3-prototypes / orangecontrib / prototypes / explanation / explainer.py View on Github external
transformed_data: Table,
    transformed_reference_data: Table,
    progress_callback: Callable,
) -> Tuple[
    Optional[List[np.ndarray]], Optional[np.ndarray], Optional[np.ndarray]
]:
    """
    Computes and returns SHAP values for learners that are explained by
    TreeExplainer: all sci-kit models based on trees. In case that explanation
    with TreeExplainer is not possible it returns None
    """
    if sparse.issparse(transformed_data.X):
        # sparse not supported by TreeExplainer, KernelExplainer can handle it
        return None, None, None
    try:
        explainer = TreeExplainer(
            model.skl_model, data=sample(transformed_reference_data.X, 100),
        )
    except (SHAPError, AttributeError):
        return None, None, None

    # TreeExplaner cannot explain in normal time more cases than 1000
    data_sample, sample_mask = _subsample_data(transformed_data, 1000)
    num_classes = (
        len(transformed_data.domain.class_var.values)
        if transformed_data.domain.class_var.is_discrete
        else None
    )

    # this method will work in batches since explaining only one attribute
    # at time the processing timed doubles comparing to batch size 10
    shap_values = []
github Ashton-Sidhu / aethos / aethos / model_analysis / model_explanation.py View on Github external
import lightgbm as lgb
        import shap

        self.model = model
        self.model_name = model_name
        self.x_train = x_train
        self.x_test = x_test
        self.y_test = y_test

        if learner == "linear":
            self.explainer = shap.LinearExplainer(
                self.model, self.x_train, feature_dependence="independent"
            )
        elif learner == "tree":
            self.explainer = shap.TreeExplainer(self.model)
        elif learner == "kernel":
            if hasattr(self.model, "predict_proba"):
                func = self.model.predict_proba
            else:
                func = self.model.predict

            self.explainer = shap.KernelExplainer(func, self.x_train)
        else:
            raise ValueError(f"Learner: {learner} is not supported yet.")

        self.expected_value = self.explainer.expected_value
        self.shap_values = np.array(self.explainer.shap_values(self.x_test)).astype(
            float
        )

        if isinstance(self.model, lgb.sklearn.LGBMClassifier) and isinstance(
github uber / causalml / causalml / inference / meta / explainer.py View on Github external
def get_shap_values(self):
        """
        Calculates shapley values for each treatment group.
        """
        shap_dict = {}
        for group, mod in self.models_tau.items():
            explainer = shap.TreeExplainer(mod)
            if self.r_learners is not None:
                explainer.model.original_model.params['objective'] = None  # hacky way of running shap without error
            shap_values = explainer.shap_values(self.X)
            shap_dict[group] = shap_values

        return shap_dict
github nirdizati-research / predict-python / src / explanation / shap_wrapper.py View on Github external
def explain(shap_exp: Explanation, training_df, test_df, explanation_target):
    job = shap_exp.job
    model = joblib.load(job.predictive_model.model_path)
    model = model[0]
    shap.initjs()

    explainer = shap.TreeExplainer(model)
    merged_df = pd.concat([training_df, test_df])
    shap_values = explainer.shap_values(merged_df.drop(['trace_id', 'label'], 1))

    encoder = retrieve_proper_encoder(job)
    encoder.decode(merged_df, job.encoding)
    encoder.decode(test_df, job.encoding)

    explanation_target_int = merged_df[merged_df['trace_id'] == explanation_target].index.item() + \
                             training_df.drop(['trace_id', 'label'], 1).shape[0]

    explanation_target_vector = test_df[test_df['trace_id'] == explanation_target].drop(['trace_id', 'label'], 1)
    expected_value = explainer.expected_value[0] if len(explainer.expected_value) > 1 else explainer.expected_value
    shap_value = shap_values[explanation_target_int, :] if hasattr(shap_values, "size") else shap_values[0][
                                                                                             explanation_target_int, :]
    name = create_unique_name("temporal_shap.svg")
    shap.force_plot(expected_value, shap_value, explanation_target_vector,
github nirdizati-research / predict-python / src / jobs / management / commands / try_shap.py View on Github external
def handle(self, *args, **kwargs):

        TARGET_MODEL = 68
        job = Job.objects.filter(pk=TARGET_MODEL)[0]
        model = joblib.load(job.predictive_model.model_path)
        model = model[0]
        training_df, test_df = get_encoded_logs(job)

        EXPLANATION_TARGET = 2_3300
        FEATURE_TARGET = 1
        shap.initjs()

        explainer = shap.TreeExplainer(model)
        training_df = training_df.drop(['trace_id','label'], 1)

        shap_values = explainer.shap_values(training_df)

        encoder = retrieve_proper_encoder(job)
        encoder.decode(training_df, job.encoding)

        shap.force_plot(explainer.expected_value, shap_values[EXPLANATION_TARGET, :],training_df.iloc[EXPLANATION_TARGET, :],
                                       show=False, matplotlib=True).savefig('shap_plot_train_1_3.png')