This notebook provides a condensed replication of the empirical results of the paper Fairness-Aware and Interpretable Policy Learning by Nora Bearth, Michael Lechner, Jana Mareckova and Fabian Muny.
The functions used for the analysis can be accessed with the
fairpolicytree
package, which can be installed from github
using install_github("fmuny/fairpolicytree")
. This is an
extension of the policytree
package (Sverdrup,
Kanodia, Zhou, Athey, Wager, 2020). To start, load the required
packages and define basic parameters.
# Packages
library(fairpolicytree)
library(policytree)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(rcompanion)
library(BayesFactor)
library(patchwork)
library(stringr)
library(knitr)
library(kableExtra)
library(cluster)
# Data source path
DATA_PATH <- "Q:/SEW/Projekte/NFP77/BeLeMaMu.Fairness/Data_fair/cleaned"
SET_STR = "05_application"
OUTCOME = "outcome0131"
# Method for ties in fairness adjustments
# One of "average", "first", "last", "random", "max", "min"
# see https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rank
TIES_METHOD <- 'random'
# Depth of policy trees
PT_DEPTH <- 3
PT_SEARCH_DEPTH <- PT_DEPTH
# If PT_SEARCH_DEPTH = PT_DEPTH then an exact tree is built.
# If PT_SEARCH_DEPTH < PT_DEPTH then a hybrid tree is built.
# See https://grf-labs.github.io/policytree/reference/hybrid_policy_tree.html
# Set seed for replicability
SEED = 12345
set.seed(SEED)
The analysis is based on the Swiss Active Labor Market Policy
Evaluation Dataset (administrative data), which can be downloaded from
https://doi.org/10.23662/FORS-DS-1203-1. The raw data
has been prepared in the file data_cleaning.py
, the scores
have been estimated using the Modified Causal Forest (MCF, Lechner 2018) in the file
data_cleaning.py
.
# Load and prepare data (add original outcome): Training sample
outcomes_pr <- read.csv(file.path(DATA_PATH, "data_clean_pr.csv"))
training_df <- read.csv(file.path(paste0(DATA_PATH, "/", SET_STR, "_", OUTCOME), "iates_pr.csv"))
outcomes_pr <- outcomes_pr[OUTCOME] %>% slice(training_df$id_mcf+1)
training_df <- cbind(training_df, outcomes_pr)
# Evaluation sample
outcomes_ev <- read.csv(file.path(DATA_PATH, "data_clean_ev.csv"))
evaluation_df <- read.csv(file.path(paste0(DATA_PATH, "/", SET_STR, "_", OUTCOME), "iates_ev.csv"))
outcomes_ev <- outcomes_ev[OUTCOME] %>% slice(evaluation_df$id_mcf+1)
evaluation_df <- cbind(evaluation_df, outcomes_ev)
# Create combined sensitive attribute
training_df$sens_comb <- factor(apply(
training_df[c("female", "swiss")], 1, function(x) paste(x, collapse = "_")))
evaluation_df$sens_comb <- factor(apply(
evaluation_df[c("female", "swiss")], 1, function(x) paste(x, collapse = "_")))
# Load variable lists and mappings
columns <- read.csv(file.path(DATA_PATH, "columns.csv"))
mappings <- read.csv(file.path(DATA_PATH, "mappings.csv"))
# Define variables
VAR_S_NAME_ORD <- columns$S_ord[columns$S_ord > 0]
VAR_A_NAME_ORD <- columns$A_ord[columns$A_ord > 0]
VAR_ID_NAME <- "id_mcf"
VAR_D_NAME <- columns$D[columns$D > 0]
unique_D <- sort(unique(training_df[[VAR_D_NAME]]))
unique_D_ch <- as.character(sort(unique(training_df[[VAR_D_NAME]])))
nunique_D <- length(unique_D)
VAR_POLSCORE_NAME <- sapply(0:(nunique_D-1), function(x) paste0(
OUTCOME, "_lc", x, "_un_lc_pot_eff"))
To get a first overview of the data, we plot the distributions of the variables of interest.
df_plot <- training_df %>%
select(all_of(c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD, VAR_S_NAME_ORD))) %>%
pivot_longer(
cols = all_of(c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD, VAR_S_NAME_ORD)),
names_to = "variable",
values_to = "value")
ggplot(
df_plot, aes(x = value)) +
geom_histogram(aes(y = after_stat(density)), colour = 'gray', bins=32, fill = "white") +
geom_density(alpha = 0.5) +
theme_minimal() +
facet_wrap(~variable, scales='free')
Next, the MQ-adjustment is performed on both the decision-relevant variables and the scores. We find that the MQ-adjustment approximately preserves the moments of the original variables but reduces the differences between the sensitive groups, as shown in the plots.
# Run MQ adjustments
datasets <- list(training = training_df, evaluation = evaluation_df)
vars_list <- list(scores = VAR_POLSCORE_NAME, As = VAR_A_NAME_ORD)
scores_adjusted <- list()
for (data_name in names(datasets)) {
df <- datasets[[data_name]]
scores_adjusted[[data_name]] <- list()
for (vars in names(vars_list)) {
cols <- vars_list[[vars]]
scores_adjusted[[data_name]][[vars]] <- mq_adjustment(
vars = df[cols],
sens = df[VAR_S_NAME_ORD],
seed = SEED,
ties.method = TIES_METHOD,
quantile.type = 4
)
if(vars=="As"){
scores_adjusted[[data_name]][[vars]]$vars_mq <- round(
scores_adjusted[[data_name]][[vars]]$vars_mq)
}
}
}
# Extract results
scores_org <- training_df[VAR_POLSCORE_NAME]
scores_cdf <- scores_adjusted$training$scores$vars_cdf
scores_mq <- scores_adjusted$training$scores$vars_mq
As_org <- training_df[VAR_A_NAME_ORD]
As_cdf <- scores_adjusted$training$As$vars_cdf
As_mq <- scores_adjusted$training$As$vars_mq
sens_org <- training_df[VAR_S_NAME_ORD]
sens_comb_org <- training_df['sens_comb']
scores_org_out <- evaluation_df[VAR_POLSCORE_NAME]
scores_cdf_out <- scores_adjusted$evaluation$scores$vars_cdf
scores_mq_out <- scores_adjusted$evaluation$scores$vars_mq
As_org_out <- evaluation_df[VAR_A_NAME_ORD]
As_cdf_out <- scores_adjusted$evaluation$As$vars_cdf
As_mq_out <- scores_adjusted$evaluation$As$vars_mq
sens_org_out <- evaluation_df[VAR_S_NAME_ORD]
sens_comb_org_out <- evaluation_df['sens_comb']
# Put into one df
training_fair_df <- cbind(training_df, scores_cdf, scores_mq, As_cdf, As_mq)
evaluation_fair_df <- cbind(evaluation_df, scores_cdf_out, scores_mq_out, As_cdf_out, As_mq_out)
# Print means and standard deviations
table_scores <- data.frame(
scores_mean = colMeans(training_df[VAR_POLSCORE_NAME]),
scores_cdf_mean = colMeans(scores_cdf),
scores_mq_mean = colMeans(scores_mq),
scores_std = sqrt(diag(var(training_df[VAR_POLSCORE_NAME]))),
scores_cdf_std = sqrt(diag(var(scores_cdf))),
scores_mq_std = sqrt(diag(var(scores_mq))))
knitr::kable(
table_scores , digits=2,
caption="Means and standard deviations of scores before and after adjustment")
scores_mean | scores_cdf_mean | scores_mq_mean | scores_std | scores_cdf_std | scores_mq_std | |
---|---|---|---|---|---|---|
outcome0131_lc0_un_lc_pot_eff | 14.59 | 0.5 | 14.59 | 4.47 | 0.29 | 4.47 |
outcome0131_lc1_un_lc_pot_eff | 13.76 | 0.5 | 13.76 | 4.24 | 0.29 | 4.24 |
outcome0131_lc2_un_lc_pot_eff | 17.34 | 0.5 | 17.34 | 4.60 | 0.29 | 4.61 |
outcome0131_lc3_un_lc_pot_eff | 17.88 | 0.5 | 17.88 | 4.90 | 0.29 | 4.90 |
outcome0131_lc4_un_lc_pot_eff | 15.79 | 0.5 | 15.79 | 4.99 | 0.29 | 4.99 |
outcome0131_lc5_un_lc_pot_eff | 14.68 | 0.5 | 14.67 | 4.53 | 0.29 | 4.53 |
table_A <- data.frame(
As_mean = colMeans(training_df[VAR_A_NAME_ORD]),
As_cdf_mean = colMeans(As_cdf),
As_mq_mean = colMeans(As_mq),
As_std = sqrt(diag(var(training_df[VAR_A_NAME_ORD]))),
As_cdf_std = sqrt(diag(var(As_cdf))),
As_mq_std = sqrt(diag(var(As_mq))))
knitr::kable(
table_A, digits=2,
caption="Means and standard deviations of decision-relevant variables before and after adjustment")
As_mean | As_cdf_mean | As_mq_mean | As_std | As_cdf_std | As_mq_std | |
---|---|---|---|---|---|---|
age | 36.72 | 0.5 | 36.72 | 8.73 | 0.29 | 8.73 |
past_income | 42352.23 | 0.5 | 42350.78 | 20364.41 | 0.29 | 20370.66 |
qual_degree | 0.57 | 0.5 | 0.57 | 0.49 | 0.29 | 0.49 |
# Plot distributions of scores and A for combinations of S
vars_to_plot <- c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD)
vars_to_plot <- c(vars_to_plot, paste0(vars_to_plot, "_mq"))
vars_to_plot_sens <- c(vars_to_plot, 'sens_comb')
map_prog <- setNames(
as.character(mappings[!is.na(mappings[VAR_D_NAME]),'X']),
as.character(mappings[!is.na(mappings[VAR_D_NAME]), VAR_D_NAME]))
df_fullplot <- training_fair_df[, vars_to_plot_sens] %>% pivot_longer(all_of(vars_to_plot))
df_fullplot <- df_fullplot %>% mutate(
type = if_else(str_ends(name, "_mq"), "Adjusted variable", "Original variable"),
name = str_remove(name, "_mq"),
program_number = str_match(name, paste0(OUTCOME, "_lc([0-9]+)_un_lc_pot_eff"))[, 2],
name = if_else(!is.na(program_number), paste0("Score (", map_prog[program_number], ")"), name),
name = if_else(name == "age", "Age", name),
name = if_else(name == "past_income", "Past earnings", name),
name = if_else(name == "qual_degree", "Degree", name)
)
df_fullplot <- df_fullplot %>% mutate(name_type = paste0(name, "\n(", type, ")"))
df_fullplot$type <- factor(df_fullplot$type, levels = c("Original variable", "Adjusted variable"))
levs <- c(
sort(unique(df_fullplot$name_type[str_ends(df_fullplot$name_type, "\\(Original variable\\)")])),
sort(unique(df_fullplot$name_type[str_ends(df_fullplot$name_type, "\\(Adjusted variable\\)")]))
)
df_fullplot$name_type <- factor(df_fullplot$name_type, levels = levs)
df_fullplot_main <- df_fullplot[is.na(df_fullplot$program_number) | df_fullplot$program_number == 0,]
df_fullplot_appendix <- df_fullplot[!is.na(df_fullplot$program_number) & df_fullplot$program_number > 0,]
histograms_cleaning <- ggplot(df_fullplot_main, aes(x = value, fill = sens_comb)) +
geom_histogram(aes(y = after_stat(density)), alpha = 0.5, position = "identity", bins = 32) +
labs(x=NULL, y = "Density", fill="Sensitive attribue:") +
theme_minimal() +
facet_wrap(~name_type, scales = "free", ncol = 4) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_fill_discrete(
labels = c("0_0" = "Foreign men", "1_0" = "Foreign women", "0_1" = "Swiss men", "1_1" = "Swiss women")) +
theme(legend.position = "bottom", text = element_text(size = 10))
plot(histograms_cleaning)
histograms_cleaning_appendix <- ggplot(df_fullplot_appendix, aes(x = value, fill = sens_comb)) +
geom_histogram(aes(y = after_stat(density)), alpha = 0.5, position = "identity", bins = 32) +
labs(x=NULL, y = "Density", fill="Sensitive attribue:") +
theme_minimal() +
facet_wrap(~name_type, scales = "free", ncol = 5) +
scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
scale_fill_discrete(
labels = c("0_0" = "Foreign men", "1_0" = "Foreign women", "0_1" = "Swiss men", "1_1" = "Swiss women")) +
theme(legend.position = "bottom", text = element_text(size = 10))
plot(histograms_cleaning_appendix)
The analysis of the policy value-interpretability-fairness trade-off starts by analyzing several benchmark policies. This includes the observed assignments and blackbox assignments according to the largest score. By using the MQ-adjusted policy scores we can obtain a fairness-aware blackbox assignment. In addition, we can obtain a completely fair benchmark (according to our definition) which assigns all individuals to the same treatment.
# Create functions for evaluation of assignments
# Policy value
compute_policyvalue <- function(Dstar, scores){
outcomes <- mapply(function(idx, row) scores[
row, idx], Dstar, seq_along(Dstar))
return(mean(outcomes))
}
# Cramer's V
compute_cramerv <- function(Dstar, sens_comb){
cramerV(table(Dstar, sens_comb))
}
# Chi2 test
compute_chi2 <- function(Dstar, sens_comb){
round(chisq.test(table(Dstar, sens_comb))$p.value, 8)
}
# Bayes Factor
compute_BF <- function(Dstar, sens_comb){
round(extractBF(contingencyTableBF(table(
Dstar, sens_comb), sampleType = "indepMulti", fixedMargin = "cols"), onlybf=TRUE, logbf=TRUE), 8)
}
# Program frequencies
compute_prog_freq <- function(Dstar, levels=unique_D){
prop.table(table(factor(Dstar, levels = unique_D)))
}
# Observed, blackbox and all-in-one assignments for training and evaluation sample
results_list <- list()
datasets <- list(training = training_fair_df, evaluation = evaluation_fair_df)
rows <- c("observed", "blackbox", "blackboxfair", "allin1")
cols <- c("Interpr.", "Policy value", "CramerV", "chi2 pval", "log(BF)", unique_D_ch)
for (set_name in names(datasets)) {
df <- datasets[[set_name]]
res <- matrix(NA, nrow = length(rows), ncol = length(cols),
dimnames = list(rows, cols))
# Observed assignment
D_obs <- df[[VAR_D_NAME]]
res["observed", ] <- c(
FALSE,
mean(df[[OUTCOME]]),
compute_cramerv(D_obs, df$sens_comb),
compute_chi2(D_obs, df$sens_comb),
compute_BF(D_obs, df$sens_comb),
compute_prog_freq(D_obs)
)
# Blackbox assignment
D_blackbox <- apply(df[VAR_POLSCORE_NAME], 1, which.max)
res["blackbox", ] <- c(
FALSE,
compute_policyvalue(D_blackbox, df[VAR_POLSCORE_NAME]),
compute_cramerv(D_blackbox, df$sens_comb),
compute_chi2(D_blackbox, df$sens_comb),
compute_BF(D_blackbox, df$sens_comb),
compute_prog_freq(D_blackbox - 1)
)
# Fair blackbox assignment
D_blackboxfair <- apply(df[paste0(VAR_POLSCORE_NAME,"_mq")], 1, which.max)
res["blackboxfair", ] <- c(
FALSE,
compute_policyvalue(D_blackboxfair, df[VAR_POLSCORE_NAME]),
compute_cramerv(D_blackboxfair, df$sens_comb),
compute_chi2(D_blackboxfair, df$sens_comb),
compute_BF(D_blackboxfair, df$sens_comb),
compute_prog_freq(D_blackboxfair - 1)
)
# All-in-one benchmark
best_program <- which.max(colMeans(df[VAR_POLSCORE_NAME]))
D_allin1 <- rep(best_program, nrow(df))
res["allin1", ] <- c(
TRUE,
compute_policyvalue(D_allin1, df[VAR_POLSCORE_NAME]),
0,
1,
log(0),
compute_prog_freq(D_allin1 - 1)
)
results_list[[set_name]] <- as.data.frame(res)
}
# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.486 | 0.075 | 0 | 120.510 | 0.746 | 0.196 | 0.013 | 0.014 | 0.021 | 0.010 |
blackbox | 0 | 18.179 | 0.096 | 0 | 270.415 | 0.000 | 0.000 | 0.366 | 0.632 | 0.002 | 0.000 |
blackboxfair | 0 | 18.160 | 0.031 | 0 | -10.661 | 0.000 | 0.000 | 0.369 | 0.626 | 0.003 | 0.002 |
allin1 | 1 | 17.882 | 0.000 | 1 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.528 | 0.065 | 0.000 | 18.118 | 0.745 | 0.197 | 0.013 | 0.014 | 0.021 | 0.009 |
blackbox | 0 | 18.174 | 0.097 | 0.000 | 116.579 | 0.000 | 0.000 | 0.369 | 0.629 | 0.002 | 0.000 |
blackboxfair | 0 | 18.152 | 0.028 | 0.001 | -27.173 | 0.000 | 0.000 | 0.375 | 0.620 | 0.004 | 0.001 |
allin1 | 1 | 17.873 | 0.000 | 1.000 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
Next, we assess policy tree assignments for different adjustment scenarios: - No adjustment (\(S\) included in decision-relevant variables) - No adjustment (\(S\) excluded from decision-relevant variables) - Adjustment of decision-relevant variables - Adjustment of scores - Adjustment of decision-relevant variables and scores Note that policies involving adjustments of the decision-relevant variables are no longer interpretable when using the standard policy trees.
# Determine tree parameters besides depth:
# split.step: To avoid lengthy computations, we determine a maximum of 100
# evaluation points for continuous variables. Hence, we set split.step = n/100
splitstep <- round(nrow(training_df)/100, 0)
# We set the minimum node size to 0.1*(n/depth):
minnodesize <- round(0.1*(nrow(training_df)/PT_DEPTH), 0)
# Determine tree type
if(PT_DEPTH == PT_SEARCH_DEPTH){
tree_FUN <- function(
X, Gamma, depth = PT_DEPTH, split.step = splitstep,
min.node.size = minnodesize, verbose = TRUE){
policy_tree(
X, Gamma, depth = depth, split.step = split.step,
min.node.size = min.node.size, verbose=verbose)
}
}else{
tree_FUN <- function(
X, Gamma, depth = PT_DEPTH, search.depth = PT_SEARCH_DEPTH,
split.step = splitstep, min.node.size = minnodesize, verbose = TRUE){
hybrid_policy_tree(
X, Gamma, depth = depth, search.depth = search.depth,
split.step = split.step, min.node.size = min.node.size, verbose=verbose)
}
}
# Run and evaluate policy trees for different adjustment scenarios
opt_tree_list <- list()
data_list <- list(
pt_unadj_inclS = list(
exp = TRUE, As = colnames(cbind(As_org, sens_org)), scores = colnames(scores_org)),
pt_unadj_exclS = list(
exp = TRUE, As = colnames(As_org), scores = colnames(scores_org)),
pt_adjust_A = list(
exp = FALSE, As = colnames(As_mq), scores = colnames(scores_org)),
pt_adjust_score = list(
exp = TRUE, As = colnames(As_org), scores = colnames(scores_mq)),
pt_adjust_A_score = list(
exp = FALSE, As = colnames(As_mq), scores = colnames(scores_mq)),
pt_adjust_Acdf = list(
exp = FALSE, As = colnames(As_cdf), scores = colnames(scores_org)),
pt_adjust_Acdf_score = list(
exp = FALSE, As = colnames(As_cdf), scores = colnames(scores_mq)))
for(set_name in names(data_list)) {
opt_tree <- tree_FUN(
training_fair_df[data_list[[set_name]]$As],
training_fair_df[data_list[[set_name]]$scores])
opt_tree_list[[set_name]] <- opt_tree
Dstar_opt <- predict(opt_tree, training_fair_df[data_list[[set_name]]$As])
Dstar_opt_out <- predict(opt_tree, evaluation_fair_df[data_list[[set_name]]$As])
results_list$training[set_name, ] <- c(
data_list[[set_name]]$exp,
compute_policyvalue(Dstar_opt, scores_org),
compute_cramerv(Dstar_opt, sens_comb_org$sens_comb),
compute_chi2(Dstar_opt, sens_comb_org$sens_comb),
compute_BF(Dstar_opt, sens_comb_org$sens_comb),
compute_prog_freq(Dstar_opt - 1))
results_list$evaluation[set_name, ] <- c(
data_list[[set_name]]$exp,
compute_policyvalue(Dstar_opt_out, scores_org_out),
compute_cramerv(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_chi2(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_BF(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_prog_freq(Dstar_opt_out - 1))
}
# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.486 | 0.075 | 0 | 120.510 | 0.746 | 0.196 | 0.013 | 0.014 | 0.021 | 0.010 |
blackbox | 0 | 18.179 | 0.096 | 0 | 270.415 | 0.000 | 0.000 | 0.366 | 0.632 | 0.002 | 0.000 |
blackboxfair | 0 | 18.160 | 0.031 | 0 | -10.661 | 0.000 | 0.000 | 0.369 | 0.626 | 0.003 | 0.002 |
allin1 | 1 | 17.882 | 0.000 | 1 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
pt_unadj_inclS | 1 | 17.923 | 0.451 | 0 | 2305.498 | 0.000 | 0.000 | 0.199 | 0.801 | 0.000 | 0.000 |
pt_unadj_exclS | 1 | 17.896 | 0.251 | 0 | 725.833 | 0.000 | 0.000 | 0.115 | 0.885 | 0.000 | 0.000 |
pt_adjust_A | 0 | 17.897 | 0.083 | 0 | 67.227 | 0.000 | 0.000 | 0.130 | 0.870 | 0.000 | 0.000 |
pt_adjust_score | 1 | 17.893 | 0.248 | 0 | 730.254 | 0.000 | 0.000 | 0.149 | 0.851 | 0.000 | 0.000 |
pt_adjust_A_score | 0 | 17.894 | 0.088 | 0 | 78.424 | 0.000 | 0.000 | 0.187 | 0.813 | 0.000 | 0.000 |
pt_adjust_Acdf | 0 | 17.898 | 0.080 | 0 | 62.356 | 0.000 | 0.000 | 0.137 | 0.863 | 0.000 | 0.000 |
pt_adjust_Acdf_score | 0 | 17.894 | 0.080 | 0 | 63.007 | 0.000 | 0.000 | 0.196 | 0.804 | 0.000 | 0.000 |
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.528 | 0.065 | 0.000 | 18.118 | 0.745 | 0.197 | 0.013 | 0.014 | 0.021 | 0.009 |
blackbox | 0 | 18.174 | 0.097 | 0.000 | 116.579 | 0.000 | 0.000 | 0.369 | 0.629 | 0.002 | 0.000 |
blackboxfair | 0 | 18.152 | 0.028 | 0.001 | -27.173 | 0.000 | 0.000 | 0.375 | 0.620 | 0.004 | 0.001 |
allin1 | 1 | 17.873 | 0.000 | 1.000 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
pt_unadj_inclS | 1 | 17.909 | 0.459 | 0.000 | 1198.717 | 0.000 | 0.000 | 0.201 | 0.799 | 0.000 | 0.000 |
pt_unadj_exclS | 1 | 17.882 | 0.266 | 0.000 | 394.799 | 0.000 | 0.000 | 0.115 | 0.885 | 0.000 | 0.000 |
pt_adjust_A | 0 | 17.883 | 0.089 | 0.000 | 35.379 | 0.000 | 0.000 | 0.124 | 0.876 | 0.000 | 0.000 |
pt_adjust_score | 1 | 17.880 | 0.248 | 0.000 | 359.030 | 0.000 | 0.000 | 0.145 | 0.855 | 0.000 | 0.000 |
pt_adjust_A_score | 0 | 17.883 | 0.090 | 0.000 | 36.355 | 0.000 | 0.000 | 0.181 | 0.819 | 0.000 | 0.000 |
pt_adjust_Acdf | 0 | 17.884 | 0.088 | 0.000 | 34.569 | 0.000 | 0.000 | 0.131 | 0.869 | 0.000 | 0.000 |
pt_adjust_Acdf_score | 0 | 17.881 | 0.084 | 0.000 | 30.038 | 0.000 | 0.000 | 0.194 | 0.806 | 0.000 | 0.000 |
# Plot the trees
lab <- mappings[!is.na(mappings[VAR_D_NAME]), 'X']
plot(opt_tree_list[['pt_unadj_inclS']], leaf.labels=lab)
plot(opt_tree_list[['pt_unadj_exclS']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_A']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_score']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_A_score']], leaf.labels=lab)
To regain interpretability of policy trees with adjusted decision-relevant variables, we can fit probabilisty split trees. We obtain one tree per sensitive group. In these trees there may occur probabilistic splits, meaning that individuals at the splitting threshold may proceed in both child nodes with a certain probability. These probabilistic splits are necessary to break the dependence of the resulting assignments with the sensitive attributes.
# Fit probabilistic split tree
data_list2 <- list(
pst_adjust_A = list(exp = TRUE, adjust_scores = FALSE),
pst_adjust_A_score = list(exp = TRUE, adjust_scores = TRUE))
for(set_name in names(data_list2)) {
opt_tree <- prob_split_tree(
As_org,
scores_org,
sens_org,
adjust_scores=data_list2[[set_name]]$adjust_scores,
seed=SEED,
ties.method=TIES_METHOD,
depth=PT_DEPTH,
search.depth=PT_SEARCH_DEPTH,
split.step=splitstep,
min.node.size=minnodesize)
opt_tree_list[[set_name]] <- opt_tree
Dstar_opt <- predict(opt_tree, As_org, sens_org, seed=SEED)
Dstar_opt_out <- predict(opt_tree, As_org_out, sens_org_out, seed=SEED)
results_list$training[set_name, ] <- c(
data_list2[[set_name]]$exp,
compute_policyvalue(Dstar_opt, scores_org),
compute_cramerv(Dstar_opt, sens_comb_org$sens_comb),
compute_chi2(Dstar_opt, sens_comb_org$sens_comb),
compute_BF(Dstar_opt, sens_comb_org$sens_comb),
compute_prog_freq(Dstar_opt - 1))
results_list$evaluation[set_name, ] <- c(
data_list2[[set_name]]$exp,
compute_policyvalue(Dstar_opt_out, scores_org_out),
compute_cramerv(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_chi2(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_BF(Dstar_opt_out, sens_comb_org_out$sens_comb),
compute_prog_freq(Dstar_opt_out - 1))
}
# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.486 | 0.075 | 0 | 120.510 | 0.746 | 0.196 | 0.013 | 0.014 | 0.021 | 0.010 |
blackbox | 0 | 18.179 | 0.096 | 0 | 270.415 | 0.000 | 0.000 | 0.366 | 0.632 | 0.002 | 0.000 |
blackboxfair | 0 | 18.160 | 0.031 | 0 | -10.661 | 0.000 | 0.000 | 0.369 | 0.626 | 0.003 | 0.002 |
allin1 | 1 | 17.882 | 0.000 | 1 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
pt_unadj_inclS | 1 | 17.923 | 0.451 | 0 | 2305.498 | 0.000 | 0.000 | 0.199 | 0.801 | 0.000 | 0.000 |
pt_unadj_exclS | 1 | 17.896 | 0.251 | 0 | 725.833 | 0.000 | 0.000 | 0.115 | 0.885 | 0.000 | 0.000 |
pt_adjust_A | 0 | 17.897 | 0.083 | 0 | 67.227 | 0.000 | 0.000 | 0.130 | 0.870 | 0.000 | 0.000 |
pt_adjust_score | 1 | 17.893 | 0.248 | 0 | 730.254 | 0.000 | 0.000 | 0.149 | 0.851 | 0.000 | 0.000 |
pt_adjust_A_score | 0 | 17.894 | 0.088 | 0 | 78.424 | 0.000 | 0.000 | 0.187 | 0.813 | 0.000 | 0.000 |
pt_adjust_Acdf | 0 | 17.898 | 0.080 | 0 | 62.356 | 0.000 | 0.000 | 0.137 | 0.863 | 0.000 | 0.000 |
pt_adjust_Acdf_score | 0 | 17.894 | 0.080 | 0 | 63.007 | 0.000 | 0.000 | 0.196 | 0.804 | 0.000 | 0.000 |
pst_adjust_A | 1 | 17.896 | 0.081 | 0 | 63.422 | 0.000 | 0.000 | 0.136 | 0.864 | 0.000 | 0.000 |
pst_adjust_A_score | 1 | 17.893 | 0.079 | 0 | 61.521 | 0.000 | 0.000 | 0.196 | 0.804 | 0.000 | 0.000 |
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Interpr. | Policy value | CramerV | chi2 pval | log(BF) | 0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|---|---|---|---|---|
observed | 0 | 14.528 | 0.065 | 0.000 | 18.118 | 0.745 | 0.197 | 0.013 | 0.014 | 0.021 | 0.009 |
blackbox | 0 | 18.174 | 0.097 | 0.000 | 116.579 | 0.000 | 0.000 | 0.369 | 0.629 | 0.002 | 0.000 |
blackboxfair | 0 | 18.152 | 0.028 | 0.001 | -27.173 | 0.000 | 0.000 | 0.375 | 0.620 | 0.004 | 0.001 |
allin1 | 1 | 17.873 | 0.000 | 1.000 | -Inf | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 | 0.000 |
pt_unadj_inclS | 1 | 17.909 | 0.459 | 0.000 | 1198.717 | 0.000 | 0.000 | 0.201 | 0.799 | 0.000 | 0.000 |
pt_unadj_exclS | 1 | 17.882 | 0.266 | 0.000 | 394.799 | 0.000 | 0.000 | 0.115 | 0.885 | 0.000 | 0.000 |
pt_adjust_A | 0 | 17.883 | 0.089 | 0.000 | 35.379 | 0.000 | 0.000 | 0.124 | 0.876 | 0.000 | 0.000 |
pt_adjust_score | 1 | 17.880 | 0.248 | 0.000 | 359.030 | 0.000 | 0.000 | 0.145 | 0.855 | 0.000 | 0.000 |
pt_adjust_A_score | 0 | 17.883 | 0.090 | 0.000 | 36.355 | 0.000 | 0.000 | 0.181 | 0.819 | 0.000 | 0.000 |
pt_adjust_Acdf | 0 | 17.884 | 0.088 | 0.000 | 34.569 | 0.000 | 0.000 | 0.131 | 0.869 | 0.000 | 0.000 |
pt_adjust_Acdf_score | 0 | 17.881 | 0.084 | 0.000 | 30.038 | 0.000 | 0.000 | 0.194 | 0.806 | 0.000 | 0.000 |
pst_adjust_A | 1 | 17.884 | 0.087 | 0.000 | 33.111 | 0.000 | 0.000 | 0.130 | 0.870 | 0.000 | 0.000 |
pst_adjust_A_score | 1 | 17.882 | 0.077 | 0.000 | 23.446 | 0.000 | 0.000 | 0.192 | 0.808 | 0.000 | 0.000 |
# Plot the trees
plot(opt_tree_list[['pst_adjust_A']], sens_names=VAR_S_NAME_ORD, leaf.labels=lab)
plot(opt_tree_list[['pst_adjust_A_score']], sens_names=VAR_S_NAME_ORD, leaf.labels=lab)
# Combine to one tree by manually adjusting dot string
dot_string_total <- ""
j <- 0
for(i in names(opt_tree_list[['pst_adjust_A_score']])){
j <- j + 1
dot_string2 <- plot(opt_tree_list[['pst_adjust_A_score']][[i]], leaf.labels=lab)[["x"]][["diagram"]]
dot_string2 <- gsub("\n(\\d)", paste0("\n",j,"\\1"), dot_string2)
dot_string2 <- gsub("-> (\\d)", paste0("-> ",j,"\\1"), dot_string2)
dot_string2 <- sub("digraph nodes { \n node [shape=box] ;\n", "", dot_string2, fixed = TRUE)
dot_string2 <- sub("\n}", "", dot_string2, fixed = TRUE)
dot_string2 <- gsub(
"[labeldistance=2.5, labelangle=45, headlabel=\"True\"]", "", dot_string2, fixed = TRUE)
dot_string2 <- gsub(
"[labeldistance=2.5, labelangle=-45, headlabel=\"False\"]", "", dot_string2, fixed = TRUE)
dot_string_total <- paste0(dot_string_total, dot_string2)
}
dot_string <- paste0("
digraph combined {
rankdir=LR;
node [shape=box];
top [label=\"female = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
top2 [label=\"swiss = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
top3 [label=\"swiss = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
", dot_string_total, "
top -> top2 [labeldistance=3, labelangle=-30, headlabel='True'];
top -> top3 [labeldistance=3, labelangle=30, headlabel='False'];
top2 -> ", 10, ";
top2 -> ", 20, ";
top3 -> ", 30, ";
top3 -> ", 40, ";
}
")
dot_string <- gsub("qual_degree", "degree", dot_string, fixed = TRUE)
dot_string <- gsub("past_income", "past earnings", dot_string, fixed = TRUE)
DiagrammeR::grViz(dot_string)
One may only partially adjust the variables to further analyse the trade-off between policy value and fairness. This is achieved by a linear combination of adjusted and original variables for different weights between 0 and 1. The steps are as follows:
# Define weights
weights <- seq(0,1,0.1)
inv_weights <- 1 - weights
As_weight <- mapply(
function(w, iw) round(As_mq * w + As_org * iw, 0), weights, inv_weights, SIMPLIFY = FALSE)
As_weight_out <- mapply(
function(w, iw) round(As_mq_out * w + As_org_out * iw, 0), weights, inv_weights, SIMPLIFY = FALSE)
scores_weight <- mapply(
function(w, iw) scores_mq * w + scores_org * iw, weights, inv_weights, SIMPLIFY = FALSE)
As_org_list <- replicate(length(weights), As_org, simplify = FALSE)
As_org_out_list <- replicate(length(weights), As_org_out, simplify = FALSE)
scores_org_list <- replicate(length(weights), scores_org, simplify = FALSE)
# Setup scenarios
scenarios <- list(
clean_As = list(As = As_weight, scores = scores_org_list, As_out = As_weight_out),
clean_Scores = list(As = As_org_list, scores = scores_weight, As_out = As_org_out_list),
clean_Scores_As = list(As = As_weight, scores = scores_weight, As_out = As_weight_out)
)
# Compute policy trees and policy value/fairness measures
rows <- list()
opt_tree_weight <- list()
for(scen in names(scenarios)){
As_list <- scenarios[[scen]]$As
Scores_list <- scenarios[[scen]]$scores
As_out_list <- scenarios[[scen]]$As_out
opt_tree_weight[[scen]] <- list()
for(i in seq_along(weights)){
opt_tree_weight[[scen]][[i]] <- tree_FUN(As_list[[i]], Scores_list[[i]])
Dstar <- predict(opt_tree_weight[[scen]][[i]], As_out_list[[i]])
rows[[length(rows) + 1]] <- data.frame(
Weight = weights[i],
Metric='Policy value',
Scenario=scen,
Value=compute_policyvalue(Dstar, scores_org_out))
rows[[(length(rows) + 1)]] <- data.frame(
Weight = weights[i],
Metric='CramerV',
Scenario=scen,
Value=compute_cramerv(Dstar, sens_comb_org_out$sens_comb)[[1]])
}
}
results_partial <- do.call(rbind, rows)
range_policyvalue <- results_partial %>% filter(Metric == "Policy value") %>%
summarise(min_val = floor(min(Value) * 10) / 10, max_val = ceiling(
max(Value) * 10) / 10)
range_CramerV <- results_partial %>% filter(Metric == "CramerV") %>%
summarise(min_val = floor(min(Value) * 10) / 10, max_val = ceiling(
max(Value) * 10) / 10)
# Plot
# Plot
p1 <- ggplot(results_partial[results_partial$Metric == "Policy value", ], aes(
x = Weight, y = Value, color = Scenario, linetype= Scenario)) +
geom_line(linewidth=0.7) +
labs(
title = "Policy Value", x = "Weight of adjusted variable",
color = "Scenario:", linetype= "Scenario:") +
ylim(range_policyvalue$min_val, range_policyvalue$max_val) +
theme_minimal() +
scale_color_discrete(labels = c(
expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
expression(paste("Adjust ", A, " and ", Gamma["d"])))) +
scale_linetype_discrete(labels = c(
expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
expression(paste("Adjust ", A, " and ", Gamma["d"]))))
p2 <- ggplot(results_partial[results_partial$Metric == "CramerV", ], aes(
x = Weight, y = Value, color = Scenario, linetype= Scenario)) +
geom_line(linewidth=0.7) +
labs(
title = "Fairness (Cramer's V)", x = "Weight of adjusted variable",
color = "Scenario:", linetype= "Scenario:") +
ylim(0, range_CramerV$max_val) +
theme_minimal() +
scale_color_discrete(labels = c(
expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
expression(paste("Adjust ", A, " and ", Gamma["d"])))) +
scale_linetype_discrete(labels = c(
expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
expression(paste("Adjust ", A, " and ", Gamma["d"]))))
plot_combined <- (p1 + p2) + plot_layout(guides = "collect")
plot_combined
Finally, we can assess the winners and losers of the probabilistic tree assignments compared to unadjusted policy tree assignments. The “winners” and “loser” groups are determined by kmeans clustering, with the number of groups determined by the Silhouette score with the restriction of a minimum cluster size of 1% of the observations.
# Prepare data
individual_policyvalue <- function(Dstar, scores) {
scores[cbind(seq_along(Dstar), Dstar)]
}
data_df <- cbind(
evaluation_df[, columns$X_ord[columns$X_ord > 0]],
fastDummies::dummy_cols(
data.frame(lapply(evaluation_df[, columns$X_unord[columns$X_unord > 0]], as.character)),
remove_selected_columns = TRUE
),
policyvalue_adjusted = individual_policyvalue(
predict(tree = opt_tree_list[["pst_adjust_A_score"]], A = As_org_out, sens = sens_org_out),
scores_org_out
),
policyvalue_unadjusted = individual_policyvalue(
predict(opt_tree_list[['pt_unadj_exclS']], As_org_out),
scores_org_out
)
)
# Compute the difference between actual and reference policy value.
data_df$policyvalue_diff <- data_df$policyvalue_adjusted - data_df$policyvalue_unadjusted
policyvalue_diff_matrix <- matrix(data_df$policyvalue_diff, ncol = 1)
# Determine optimal number of clusters using Silhouette score
dist_matrix <- stats::dist(policyvalue_diff_matrix)
min_cluster_size <- ceiling(nrow(data_df) / 100)
sil_width <- sapply(2:8, function(k){
km <- kmeans(policyvalue_diff_matrix, centers = k, nstart = 10)
ss <- silhouette(km$cluster, dist_matrix)
if(min(km$size) > min_cluster_size){
mean(ss[, 3])
}else{
0
}
})
n_clusters <- which.max(sil_width) + 1
# Get clusters ordered by cluster mean
km <- kmeans(policyvalue_diff_matrix, centers = n_clusters, nstart = 10)
ordered_clusters <- order(km$centers)
cluster_map <- setNames(seq_along(ordered_clusters), ordered_clusters)
data_df$cluster_ids <- cluster_map[as.character(km$cluster)]
# Get variables means
df_grouped_means <- data_df %>%
group_by(cluster_ids) %>%
summarise(n = n(), across(where(is.numeric),\(x) mean(x, na.rm = TRUE))) %>%
pivot_longer(cols = -cluster_ids, names_to = "variable", values_to = "mean") %>%
pivot_wider(names_from = cluster_ids, values_from = mean) %>%
arrange(factor(variable, levels = c("policyvalue_diff", setdiff(variable, "policyvalue_diff")))) %>%
mutate(across(where(is.numeric), round, 2))
# Generale meaningful cluster labels
generate_cluster_labels <- function(means) {
labels <- character(length(means))
gains <- means > 0
losses <- means < 0
zeros <- means == 0
intensity_labels <- c("Strong", "Moderate", "Mild", "Slight")
# Gains
if(sum(gains) > 0){
ranks <- rank(-means[gains], ties.method = "first")
intensity <- if(sum(gains) > 1) paste(intensity_labels[seq_along(ranks)], "Gain") else "Gain"
labels[gains] <- intensity[order(ranks)]
}
# Losses
if(sum(losses) > 0){
ranks <- rank(means[losses], ties.method = "first")
intensity <- if(sum(losses) > 1) paste(intensity_labels[seq_along(ranks)], "Loss") else "Loss"
labels[losses] <- intensity[order(ranks)]
}
# No change
labels[zeros] <- "No Change"
return(labels)
}
colnames(df_grouped_means) <- c("Variable", generate_cluster_labels(
round(as.numeric(subset(df_grouped_means, variable == 'policyvalue_diff')[, -1]),2)))
# Print
knitr::kable(
df_grouped_means,
caption="Covariate means of k-means clusters")
Variable | Strong Loss | Moderate Loss | No Change | Moderate Gain | Strong Gain |
---|---|---|---|---|---|
policyvalue_diff | -2.16 | -1.00 | 0.00 | 0.93 | 1.97 |
n | 185.00 | 392.00 | 10688.00 | 399.00 | 196.00 |
female | 0.54 | 0.58 | 0.43 | 0.52 | 0.58 |
swiss | 0.79 | 0.59 | 0.64 | 0.65 | 0.73 |
age | 42.41 | 42.32 | 36.00 | 42.18 | 42.57 |
canton_moth_tongue | 0.03 | 0.09 | 0.11 | 0.09 | 0.07 |
cw_age | 42.74 | 43.20 | 44.08 | 46.03 | 47.41 |
cw_cooperative | 0.59 | 0.51 | 0.49 | 0.39 | 0.32 |
cw_educ_above_voc | 0.33 | 0.44 | 0.46 | 0.50 | 0.58 |
cw_educ_tertiary | 0.38 | 0.19 | 0.20 | 0.13 | 0.11 |
cw_female | 0.54 | 0.49 | 0.44 | 0.43 | 0.35 |
cw_missing | 0.05 | 0.05 | 0.05 | 0.04 | 0.08 |
cw_own_ue | 0.70 | 0.59 | 0.62 | 0.64 | 0.71 |
cw_tenure | 4.61 | 5.33 | 5.53 | 5.97 | 6.13 |
cw_voc_degree | 0.24 | 0.28 | 0.26 | 0.26 | 0.14 |
emp_share_last_2yrs | 0.72 | 0.76 | 0.81 | 0.78 | 0.81 |
emp_spells_5yrs | 1.23 | 1.20 | 1.19 | 1.11 | 0.93 |
employability | 1.85 | 1.84 | 1.93 | 1.75 | 1.84 |
foreigner_b | 0.06 | 0.16 | 0.13 | 0.12 | 0.07 |
foreigner_c | 0.14 | 0.25 | 0.23 | 0.23 | 0.19 |
gdp_pc | 0.59 | 0.55 | 0.52 | 0.48 | 0.48 |
married | 0.50 | 0.61 | 0.46 | 0.63 | 0.64 |
other_mother_tongue | 0.15 | 0.42 | 0.33 | 0.40 | 0.34 |
past_income | 25671.35 | 27755.66 | 43964.99 | 27246.47 | 28049.13 |
ue_cw_allocation1 | 0.68 | 0.58 | 0.59 | 0.54 | 0.47 |
ue_cw_allocation2 | 0.58 | 0.53 | 0.51 | 0.42 | 0.31 |
ue_cw_allocation3 | 0.06 | 0.05 | 0.04 | 0.05 | 0.02 |
ue_cw_allocation4 | 0.06 | 0.09 | 0.08 | 0.10 | 0.05 |
ue_cw_allocation5 | 0.08 | 0.16 | 0.12 | 0.13 | 0.16 |
ue_cw_allocation6 | 0.10 | 0.11 | 0.09 | 0.10 | 0.07 |
ue_spells_last_2yrs | 0.62 | 0.72 | 0.56 | 0.62 | 0.43 |
unemp_rate | 4.25 | 3.72 | 3.51 | 3.19 | 3.20 |
qual_degree | 0.62 | 0.42 | 0.58 | 0.47 | 0.43 |
city_1 | 0.56 | 0.65 | 0.68 | 0.75 | 0.77 |
city_2 | 0.14 | 0.11 | 0.13 | 0.14 | 0.14 |
city_3 | 0.30 | 0.24 | 0.20 | 0.12 | 0.10 |
prev_job_sec_cat_0 | 0.66 | 0.56 | 0.61 | 0.59 | 0.61 |
prev_job_sec_cat_1 | 0.07 | 0.12 | 0.14 | 0.13 | 0.12 |
prev_job_sec_cat_2 | 0.24 | 0.26 | 0.17 | 0.20 | 0.20 |
prev_job_sec_cat_3 | 0.03 | 0.06 | 0.09 | 0.08 | 0.07 |
prev_job_0 | 0.66 | 0.50 | 0.61 | 0.45 | 0.52 |
prev_job_1 | 0.04 | 0.04 | 0.07 | 0.03 | 0.05 |
prev_job_2 | 0.24 | 0.44 | 0.28 | 0.50 | 0.41 |
prev_job_3 | 0.06 | 0.02 | 0.03 | 0.02 | 0.02 |
qual_0 | 0.62 | 0.42 | 0.58 | 0.47 | 0.43 |
qual_1 | 0.09 | 0.18 | 0.16 | 0.20 | 0.32 |
qual_2 | 0.26 | 0.36 | 0.23 | 0.30 | 0.21 |
qual_3 | 0.02 | 0.03 | 0.04 | 0.03 | 0.04 |
policyvalue_adjusted | 15.19 | 14.39 | 18.16 | 15.61 | 16.88 |
policyvalue_unadjusted | 17.34 | 15.39 | 18.16 | 14.68 | 14.91 |