How to use aif360 - 10 common examples

To help you get started, we’ve selected a few aif360 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / AIF360 / tests / test_meta_classifier.py View on Github external
debiased_model.fit(train)

    #dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
    dataset_debiasing_test = debiased_model.predict(test)

    predictions = list(dataset_debiasing_test.labels)
    predictions = [1 if y == train.favorable_label else
                  -1 for y in dataset_debiasing_test.labels.ravel()]
    y_test = np.array([1 if y == train.favorable_label else
                      -1 for y in test.labels.ravel()])
    x_control_test = pd.DataFrame(data=test.features,
                                  columns=test.feature_names)[protected]

    acc, sr, fdr = getStats(y_test, predictions, x_control_test)

    debiased_cm = ClassificationMetric(test, dataset_debiasing_test,
        unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
    fdr2 = debiased_cm.false_discovery_rate_ratio()
    fdr2 = min(fdr2, 1/fdr2)
    assert np.isclose(fdr, fdr2)
    #print(fdr, unconstrainedFDR)
    assert(fdr2 >= unconstrainedFDR2)
github IBM / AIF360 / tests / test_differential_fairness.py View on Github external
df['race-num'] = df.race.map(mapping)
        return df.fillna('Unknown')

    nonbinary_ad = AdultDataset(
            protected_attribute_names=['sex', 'native-country', 'race-num'],
            privileged_classes=[['Male'], ['United-States'], [1]],
            categorical_features=['workclass', 'education', 'marital-status',
                                  'occupation', 'relationship', 'race'],
            custom_preprocessing=custom_preprocessing)
    # drop redundant race feature (not relevant to this test)
    index = nonbinary_ad.feature_names.index('race-num')
    nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
    nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)

    nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
    dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
    eps_data = dataset_metric.smoothed_empirical_differential_fairness()
    assert eps_data == 2.063813731996515  # verified with reference implementation
github IBM / AIF360 / tests / test_differential_fairness.py View on Github external
def test_epsilon_all_groups():
    def custom_preprocessing(df):
        # slight workaround for non-binary protected attribute
        # feature should be categorical but protected attribute should be numerical
        mapping = {'Black': 0, 'White': 1, 'Asian-Pac-Islander': 2,
                   'Amer-Indian-Eskimo': 3, 'Other': 4}
        df['race-num'] = df.race.map(mapping)
        return df.fillna('Unknown')

    nonbinary_ad = AdultDataset(
            protected_attribute_names=['sex', 'native-country', 'race-num'],
            privileged_classes=[['Male'], ['United-States'], [1]],
            categorical_features=['workclass', 'education', 'marital-status',
                                  'occupation', 'relationship', 'race'],
            custom_preprocessing=custom_preprocessing)
    # drop redundant race feature (not relevant to this test)
    index = nonbinary_ad.feature_names.index('race-num')
    nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
    nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)

    nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
    dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
    eps_data = dataset_metric.smoothed_empirical_differential_fairness()
    assert eps_data == 2.063813731996515  # verified with reference implementation
github IBM / AIF360 / tests / sklearn / test_calibrated_equalized_odds.py View on Github external
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from aif360.datasets import AdultDataset
from aif360.sklearn.datasets import fetch_adult
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta


X, y, sample_weight = fetch_adult(numeric_only=True)
adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
                          'hours-per-week'], features_to_drop=[])

def test_calib_eq_odds_sex_weighted():
    logreg = LogisticRegression(solver='lbfgs', max_iter=500)
    y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
    adult_pred = adult.copy()
    adult_pred.scores = y_pred[:, 1]
    orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
    orig_cal_eq_odds.fit(adult, adult_pred)
    cal_eq_odds = CalibratedEqualizedOdds('sex')
    cal_eq_odds.fit(y_pred, y, sample_weight=sample_weight)

    assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
    assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
github IBM / AIF360 / tests / test_disparate_impact_remover.py View on Github external
def test_adult():
    protected = 'sex'
    ad = AdultDataset(protected_attribute_names=[protected],
        privileged_classes=[['Male']], categorical_features=[],
        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])

    scaler = MinMaxScaler(copy=False)
    # ad.features = scaler.fit_transform(ad.features)

    train, test = ad.split([32561])
    assert np.any(test.labels)

    train.features = scaler.fit_transform(train.features)
    test.features = scaler.transform(test.features)

    index = train.feature_names.index(protected)
    X_tr = np.delete(train.features, index, axis=1)
    X_te = np.delete(test.features, index, axis=1)
    y_tr = train.labels.ravel()
github IBM / AIF360 / tests / test_standard_datasets.py View on Github external
def test_adult_test_set():
    ad = AdultDataset()
    # train, test = ad.split([32561])
    train, test = ad.split([30162])
    assert np.any(test.labels)
github IBM / AIF360 / tests / test_differential_fairness.py View on Github external
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

ad = AdultDataset(protected_attribute_names=['race', 'sex', 'native-country'],
                  privileged_classes=[['White'], ['Male'], ['United-States']],
                  categorical_features=['workclass', 'education',
                          'marital-status', 'occupation', 'relationship'],
                  custom_preprocessing=lambda df: df.fillna('Unknown'))
adult_test, adult_train = ad.split([16281], shuffle=False)

scaler = StandardScaler()
X = scaler.fit_transform(adult_train.features)
test_X = scaler.transform(adult_test.features)
clf = LogisticRegression(C=1.0, random_state=0, solver='liblinear')

adult_pred = adult_test.copy()
adult_pred.labels = clf.fit(X, adult_train.labels.ravel()).predict(test_X)

dataset_metric = BinaryLabelDatasetMetric(adult_test)
classifier_metric = BinaryLabelDatasetMetric(adult_pred)
github IBM / AIF360 / tests / test_meta_classifier.py View on Github external
def test_adult():
    np.random.seed(1)
    # np.random.seed(9876)

    protected = 'sex'
    ad = AdultDataset(protected_attribute_names=[protected],
                      privileged_classes=[['Male']], categorical_features=[],
                      features_to_keep=['age', 'education-num', 'capital-gain',
                                        'capital-loss', 'hours-per-week'])

    #scaler = MinMaxScaler(copy=False)
    # ad.features = scaler.fit_transform(ad.features)

    train, test = ad.split([32561])

    biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
    biased_model.fit(train)

    dataset_bias_test = biased_model.predict(test)

    biased_cm = ClassificationMetric(test, dataset_bias_test,
        unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_k_folds():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    folds = sd.split(4)

    assert len(folds) == 4
    assert all(f.features.shape[0] == f.labels.shape[0]
            == f.protected_attributes.shape[0] == len(f.instance_names)
            == f.instance_weights.shape[0] == 1 for f in folds)

    folds = sd.split(3)
    assert folds[0].features.shape[0] == 2
github IBM / AIF360 / tests / test_classification_metric.py View on Github external
[1, 0],
                     [1, 1],
                     [1, 0],
                     [1, 0],
                     [2, 1],
                     [2, 0],
                     [2, 1],
                     [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df, label_names=['label'],
        protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
        protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    assert cm.theil_index() == 4*np.log(2)/10