Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
debiased_model.fit(train)
#dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(test)
predictions = list(dataset_debiasing_test.labels)
predictions = [1 if y == train.favorable_label else
-1 for y in dataset_debiasing_test.labels.ravel()]
y_test = np.array([1 if y == train.favorable_label else
-1 for y in test.labels.ravel()])
x_control_test = pd.DataFrame(data=test.features,
columns=test.feature_names)[protected]
acc, sr, fdr = getStats(y_test, predictions, x_control_test)
debiased_cm = ClassificationMetric(test, dataset_debiasing_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
fdr2 = debiased_cm.false_discovery_rate_ratio()
fdr2 = min(fdr2, 1/fdr2)
assert np.isclose(fdr, fdr2)
#print(fdr, unconstrainedFDR)
assert(fdr2 >= unconstrainedFDR2)
df['race-num'] = df.race.map(mapping)
return df.fillna('Unknown')
nonbinary_ad = AdultDataset(
protected_attribute_names=['sex', 'native-country', 'race-num'],
privileged_classes=[['Male'], ['United-States'], [1]],
categorical_features=['workclass', 'education', 'marital-status',
'occupation', 'relationship', 'race'],
custom_preprocessing=custom_preprocessing)
# drop redundant race feature (not relevant to this test)
index = nonbinary_ad.feature_names.index('race-num')
nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)
nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
eps_data = dataset_metric.smoothed_empirical_differential_fairness()
assert eps_data == 2.063813731996515 # verified with reference implementation
def test_epsilon_all_groups():
def custom_preprocessing(df):
# slight workaround for non-binary protected attribute
# feature should be categorical but protected attribute should be numerical
mapping = {'Black': 0, 'White': 1, 'Asian-Pac-Islander': 2,
'Amer-Indian-Eskimo': 3, 'Other': 4}
df['race-num'] = df.race.map(mapping)
return df.fillna('Unknown')
nonbinary_ad = AdultDataset(
protected_attribute_names=['sex', 'native-country', 'race-num'],
privileged_classes=[['Male'], ['United-States'], [1]],
categorical_features=['workclass', 'education', 'marital-status',
'occupation', 'relationship', 'race'],
custom_preprocessing=custom_preprocessing)
# drop redundant race feature (not relevant to this test)
index = nonbinary_ad.feature_names.index('race-num')
nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)
nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
eps_data = dataset_metric.smoothed_empirical_differential_fairness()
assert eps_data == 2.063813731996515 # verified with reference implementation
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from aif360.datasets import AdultDataset
from aif360.sklearn.datasets import fetch_adult
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta
X, y, sample_weight = fetch_adult(numeric_only=True)
adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
'hours-per-week'], features_to_drop=[])
def test_calib_eq_odds_sex_weighted():
logreg = LogisticRegression(solver='lbfgs', max_iter=500)
y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
adult_pred = adult.copy()
adult_pred.scores = y_pred[:, 1]
orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
orig_cal_eq_odds.fit(adult, adult_pred)
cal_eq_odds = CalibratedEqualizedOdds('sex')
cal_eq_odds.fit(y_pred, y, sample_weight=sample_weight)
assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
def test_adult():
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])
scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)
train, test = ad.split([32561])
assert np.any(test.labels)
train.features = scaler.fit_transform(train.features)
test.features = scaler.transform(test.features)
index = train.feature_names.index(protected)
X_tr = np.delete(train.features, index, axis=1)
X_te = np.delete(test.features, index, axis=1)
y_tr = train.labels.ravel()
def test_adult_test_set():
ad = AdultDataset()
# train, test = ad.split([32561])
train, test = ad.split([30162])
assert np.any(test.labels)
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
ad = AdultDataset(protected_attribute_names=['race', 'sex', 'native-country'],
privileged_classes=[['White'], ['Male'], ['United-States']],
categorical_features=['workclass', 'education',
'marital-status', 'occupation', 'relationship'],
custom_preprocessing=lambda df: df.fillna('Unknown'))
adult_test, adult_train = ad.split([16281], shuffle=False)
scaler = StandardScaler()
X = scaler.fit_transform(adult_train.features)
test_X = scaler.transform(adult_test.features)
clf = LogisticRegression(C=1.0, random_state=0, solver='liblinear')
adult_pred = adult_test.copy()
adult_pred.labels = clf.fit(X, adult_train.labels.ravel()).predict(test_X)
dataset_metric = BinaryLabelDatasetMetric(adult_test)
classifier_metric = BinaryLabelDatasetMetric(adult_pred)
def test_adult():
np.random.seed(1)
# np.random.seed(9876)
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain',
'capital-loss', 'hours-per-week'])
#scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)
train, test = ad.split([32561])
biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
biased_model.fit(train)
dataset_bias_test = biased_model.predict(test)
biased_cm = ClassificationMetric(test, dataset_bias_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
def test_k_folds():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
folds = sd.split(4)
assert len(folds) == 4
assert all(f.features.shape[0] == f.labels.shape[0]
== f.protected_attributes.shape[0] == len(f.instance_names)
== f.instance_weights.shape[0] == 1 for f in folds)
folds = sd.split(3)
assert folds[0].features.shape[0] == 2
[1, 0],
[1, 1],
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.theil_index() == 4*np.log(2)/10