This notebook provides a condensed replication of the empirical results of the paper Fairness-Aware and Interpretable Policy Learning by Nora Bearth, Michael Lechner, Jana Mareckova and Fabian Muny.

1 Preparations

The functions used for the analysis can be accessed with the fairpolicytree package, which can be installed from github using install_github("fmuny/fairpolicytree"). This is an extension of the policytree package (Sverdrup, Kanodia, Zhou, Athey, Wager, 2020). To start, load the required packages and define basic parameters.

# Packages
library(fairpolicytree)
library(policytree)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(rcompanion)
library(BayesFactor)
library(patchwork)
library(stringr)
library(knitr)
library(kableExtra)
library(cluster)

# Data source path
DATA_PATH <- "Q:/SEW/Projekte/NFP77/BeLeMaMu.Fairness/Data_fair/cleaned"
SET_STR = "05_application"
OUTCOME = "outcome0131"

# Method for ties in fairness adjustments
# One of "average", "first", "last", "random", "max", "min"
# see https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/rank
TIES_METHOD <- 'random'

# Depth of policy trees
PT_DEPTH <- 3
PT_SEARCH_DEPTH <- PT_DEPTH
# If PT_SEARCH_DEPTH = PT_DEPTH then an exact tree is built.
# If PT_SEARCH_DEPTH < PT_DEPTH then a hybrid tree is built.
# See https://grf-labs.github.io/policytree/reference/hybrid_policy_tree.html

# Set seed for replicability
SEED = 12345
set.seed(SEED)

The analysis is based on the Swiss Active Labor Market Policy Evaluation Dataset (administrative data), which can be downloaded from https://doi.org/10.23662/FORS-DS-1203-1. The raw data has been prepared in the file data_cleaning.py, the scores have been estimated using the Modified Causal Forest (MCF, Lechner 2018) in the file data_cleaning.py.

# Load and prepare data (add original outcome): Training sample
outcomes_pr <- read.csv(file.path(DATA_PATH, "data_clean_pr.csv"))
training_df <- read.csv(file.path(paste0(DATA_PATH, "/", SET_STR, "_", OUTCOME), "iates_pr.csv"))
outcomes_pr <- outcomes_pr[OUTCOME] %>% slice(training_df$id_mcf+1)
training_df <- cbind(training_df, outcomes_pr)

# Evaluation sample
outcomes_ev <- read.csv(file.path(DATA_PATH, "data_clean_ev.csv"))
evaluation_df <- read.csv(file.path(paste0(DATA_PATH, "/", SET_STR, "_", OUTCOME), "iates_ev.csv"))
outcomes_ev <- outcomes_ev[OUTCOME] %>% slice(evaluation_df$id_mcf+1)
evaluation_df <- cbind(evaluation_df, outcomes_ev)

# Create combined sensitive attribute
training_df$sens_comb <- factor(apply(
  training_df[c("female", "swiss")], 1, function(x) paste(x, collapse = "_")))
evaluation_df$sens_comb <- factor(apply(
  evaluation_df[c("female", "swiss")], 1, function(x) paste(x, collapse = "_")))

# Load variable lists and mappings
columns <- read.csv(file.path(DATA_PATH, "columns.csv"))
mappings <- read.csv(file.path(DATA_PATH, "mappings.csv"))

# Define variables
VAR_S_NAME_ORD <- columns$S_ord[columns$S_ord > 0]
VAR_A_NAME_ORD <- columns$A_ord[columns$A_ord > 0]
VAR_ID_NAME <- "id_mcf"
VAR_D_NAME <- columns$D[columns$D > 0]
unique_D <- sort(unique(training_df[[VAR_D_NAME]]))
unique_D_ch <- as.character(sort(unique(training_df[[VAR_D_NAME]])))
nunique_D <- length(unique_D)
VAR_POLSCORE_NAME <- sapply(0:(nunique_D-1), function(x) paste0(
  OUTCOME, "_lc", x, "_un_lc_pot_eff"))

To get a first overview of the data, we plot the distributions of the variables of interest.

df_plot <- training_df %>% 
  select(all_of(c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD, VAR_S_NAME_ORD))) %>%
  pivot_longer(
    cols = all_of(c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD, VAR_S_NAME_ORD)), 
    names_to = "variable", 
    values_to = "value")
ggplot(
  df_plot, aes(x = value)) +
  geom_histogram(aes(y = after_stat(density)), colour = 'gray', bins=32, fill = "white") +
  geom_density(alpha = 0.5) +
  theme_minimal() +
  facet_wrap(~variable, scales='free')

2 MQ-Adjustment

Next, the MQ-adjustment is performed on both the decision-relevant variables and the scores. We find that the MQ-adjustment approximately preserves the moments of the original variables but reduces the differences between the sensitive groups, as shown in the plots.

# Run MQ adjustments
datasets <- list(training = training_df, evaluation = evaluation_df)
vars_list <- list(scores = VAR_POLSCORE_NAME, As = VAR_A_NAME_ORD)
scores_adjusted <- list()
for (data_name in names(datasets)) {
  df <- datasets[[data_name]]
  scores_adjusted[[data_name]] <- list()
  for (vars in names(vars_list)) {
    cols <- vars_list[[vars]]
    scores_adjusted[[data_name]][[vars]] <- mq_adjustment(
      vars = df[cols],
      sens = df[VAR_S_NAME_ORD],
      seed = SEED,
      ties.method = TIES_METHOD,
      quantile.type = 4
    )
    if(vars=="As"){
      scores_adjusted[[data_name]][[vars]]$vars_mq <- round(
        scores_adjusted[[data_name]][[vars]]$vars_mq)
    }
  }
}
# Extract results
scores_org <- training_df[VAR_POLSCORE_NAME]
scores_cdf <- scores_adjusted$training$scores$vars_cdf
scores_mq <- scores_adjusted$training$scores$vars_mq
As_org <- training_df[VAR_A_NAME_ORD]
As_cdf <- scores_adjusted$training$As$vars_cdf
As_mq <- scores_adjusted$training$As$vars_mq
sens_org <- training_df[VAR_S_NAME_ORD]
sens_comb_org <- training_df['sens_comb']

scores_org_out <- evaluation_df[VAR_POLSCORE_NAME]
scores_cdf_out <- scores_adjusted$evaluation$scores$vars_cdf
scores_mq_out <- scores_adjusted$evaluation$scores$vars_mq
As_org_out <- evaluation_df[VAR_A_NAME_ORD]
As_cdf_out <- scores_adjusted$evaluation$As$vars_cdf
As_mq_out <- scores_adjusted$evaluation$As$vars_mq
sens_org_out <- evaluation_df[VAR_S_NAME_ORD]
sens_comb_org_out <- evaluation_df['sens_comb']

# Put into one df
training_fair_df <- cbind(training_df, scores_cdf, scores_mq, As_cdf, As_mq)
evaluation_fair_df <- cbind(evaluation_df, scores_cdf_out, scores_mq_out, As_cdf_out, As_mq_out)

# Print means and standard deviations
table_scores <- data.frame(
  scores_mean = colMeans(training_df[VAR_POLSCORE_NAME]),
  scores_cdf_mean = colMeans(scores_cdf),
  scores_mq_mean = colMeans(scores_mq),
  scores_std = sqrt(diag(var(training_df[VAR_POLSCORE_NAME]))),
  scores_cdf_std = sqrt(diag(var(scores_cdf))),
  scores_mq_std = sqrt(diag(var(scores_mq))))
knitr::kable(
  table_scores , digits=2,
  caption="Means and standard deviations of scores before and after adjustment")
Means and standard deviations of scores before and after adjustment
scores_mean scores_cdf_mean scores_mq_mean scores_std scores_cdf_std scores_mq_std
outcome0131_lc0_un_lc_pot_eff 14.59 0.5 14.59 4.47 0.29 4.47
outcome0131_lc1_un_lc_pot_eff 13.76 0.5 13.76 4.24 0.29 4.24
outcome0131_lc2_un_lc_pot_eff 17.34 0.5 17.34 4.60 0.29 4.61
outcome0131_lc3_un_lc_pot_eff 17.88 0.5 17.88 4.90 0.29 4.90
outcome0131_lc4_un_lc_pot_eff 15.79 0.5 15.79 4.99 0.29 4.99
outcome0131_lc5_un_lc_pot_eff 14.68 0.5 14.67 4.53 0.29 4.53
table_A <- data.frame(
    As_mean = colMeans(training_df[VAR_A_NAME_ORD]),
    As_cdf_mean = colMeans(As_cdf),
    As_mq_mean = colMeans(As_mq),
    As_std = sqrt(diag(var(training_df[VAR_A_NAME_ORD]))),
    As_cdf_std = sqrt(diag(var(As_cdf))),
    As_mq_std = sqrt(diag(var(As_mq))))
knitr::kable(
  table_A, digits=2,
  caption="Means and standard deviations of decision-relevant variables before and after adjustment")
Means and standard deviations of decision-relevant variables before and after adjustment
As_mean As_cdf_mean As_mq_mean As_std As_cdf_std As_mq_std
age 36.72 0.5 36.72 8.73 0.29 8.73
past_income 42352.23 0.5 42350.78 20364.41 0.29 20370.66
qual_degree 0.57 0.5 0.57 0.49 0.29 0.49
# Plot distributions of scores and A for combinations of S
vars_to_plot <- c(VAR_POLSCORE_NAME, VAR_A_NAME_ORD)
vars_to_plot <- c(vars_to_plot, paste0(vars_to_plot, "_mq"))
vars_to_plot_sens <- c(vars_to_plot, 'sens_comb')

map_prog <- setNames(
  as.character(mappings[!is.na(mappings[VAR_D_NAME]),'X']),
  as.character(mappings[!is.na(mappings[VAR_D_NAME]), VAR_D_NAME]))

df_fullplot <- training_fair_df[, vars_to_plot_sens] %>% pivot_longer(all_of(vars_to_plot))
df_fullplot <- df_fullplot %>% mutate(
  type = if_else(str_ends(name, "_mq"), "Adjusted variable", "Original variable"),
  name = str_remove(name, "_mq"),
  program_number = str_match(name, paste0(OUTCOME, "_lc([0-9]+)_un_lc_pot_eff"))[, 2],
  name = if_else(!is.na(program_number), paste0("Score (", map_prog[program_number], ")"), name),
  name = if_else(name == "age", "Age", name),
  name = if_else(name == "past_income", "Past earnings", name),
  name = if_else(name == "qual_degree", "Degree", name)
)

df_fullplot <- df_fullplot %>% mutate(name_type = paste0(name, "\n(", type, ")"))
df_fullplot$type <- factor(df_fullplot$type, levels = c("Original variable", "Adjusted variable"))
levs <- c(
  sort(unique(df_fullplot$name_type[str_ends(df_fullplot$name_type, "\\(Original variable\\)")])),
  sort(unique(df_fullplot$name_type[str_ends(df_fullplot$name_type, "\\(Adjusted variable\\)")]))
)
df_fullplot$name_type <- factor(df_fullplot$name_type, levels = levs)

df_fullplot_main <- df_fullplot[is.na(df_fullplot$program_number) | df_fullplot$program_number == 0,]
df_fullplot_appendix <- df_fullplot[!is.na(df_fullplot$program_number) & df_fullplot$program_number > 0,]

histograms_cleaning <- ggplot(df_fullplot_main, aes(x = value, fill = sens_comb)) +
  geom_histogram(aes(y = after_stat(density)), alpha = 0.5, position = "identity", bins = 32) +
  labs(x=NULL, y = "Density", fill="Sensitive attribue:") +
  theme_minimal() +
  facet_wrap(~name_type, scales = "free", ncol = 4) +
  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
  scale_fill_discrete(
    labels = c("0_0" = "Foreign men", "1_0" = "Foreign women", "0_1" = "Swiss men", "1_1" = "Swiss women")) +
  theme(legend.position = "bottom", text = element_text(size = 10))
plot(histograms_cleaning)

histograms_cleaning_appendix <- ggplot(df_fullplot_appendix, aes(x = value, fill = sens_comb)) +
  geom_histogram(aes(y = after_stat(density)), alpha = 0.5, position = "identity", bins = 32) +
  labs(x=NULL, y = "Density", fill="Sensitive attribue:") +
  theme_minimal() +
  facet_wrap(~name_type, scales = "free", ncol = 5) +
  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
  scale_fill_discrete(
    labels = c("0_0" = "Foreign men", "1_0" = "Foreign women", "0_1" = "Swiss men", "1_1" = "Swiss women")) +
  theme(legend.position = "bottom", text = element_text(size = 10))
plot(histograms_cleaning_appendix)

3 Benchmark Policies

The analysis of the policy value-interpretability-fairness trade-off starts by analyzing several benchmark policies. This includes the observed assignments and blackbox assignments according to the largest score. By using the MQ-adjusted policy scores we can obtain a fairness-aware blackbox assignment. In addition, we can obtain a completely fair benchmark (according to our definition) which assigns all individuals to the same treatment.

# Create functions for evaluation of assignments
# Policy value
compute_policyvalue <- function(Dstar, scores){
  outcomes <- mapply(function(idx, row) scores[
    row, idx], Dstar, seq_along(Dstar))
  return(mean(outcomes))
}
# Cramer's V
compute_cramerv <- function(Dstar, sens_comb){
  cramerV(table(Dstar, sens_comb))
}
# Chi2 test
compute_chi2 <- function(Dstar, sens_comb){
  round(chisq.test(table(Dstar, sens_comb))$p.value, 8)
}
# Bayes Factor
compute_BF <- function(Dstar, sens_comb){
  round(extractBF(contingencyTableBF(table(
    Dstar, sens_comb), sampleType = "indepMulti", fixedMargin = "cols"), onlybf=TRUE, logbf=TRUE), 8)
}
# Program frequencies
compute_prog_freq <- function(Dstar, levels=unique_D){
  prop.table(table(factor(Dstar, levels = unique_D)))
}


# Observed, blackbox and all-in-one assignments for training and evaluation sample
results_list <- list()
datasets <- list(training = training_fair_df, evaluation = evaluation_fair_df)
rows <- c("observed", "blackbox", "blackboxfair", "allin1")
cols <- c("Interpr.", "Policy value", "CramerV", "chi2 pval", "log(BF)", unique_D_ch)
for (set_name in names(datasets)) {
  df <- datasets[[set_name]]
  res <- matrix(NA, nrow = length(rows), ncol = length(cols),
                dimnames = list(rows, cols))
  # Observed assignment
  D_obs <- df[[VAR_D_NAME]]
  res["observed", ] <- c(
    FALSE,
    mean(df[[OUTCOME]]),
    compute_cramerv(D_obs, df$sens_comb),
    compute_chi2(D_obs, df$sens_comb),
    compute_BF(D_obs, df$sens_comb),
    compute_prog_freq(D_obs)
  )
  # Blackbox assignment
  D_blackbox <- apply(df[VAR_POLSCORE_NAME], 1, which.max)
  res["blackbox", ] <- c(
    FALSE,
    compute_policyvalue(D_blackbox, df[VAR_POLSCORE_NAME]),
    compute_cramerv(D_blackbox, df$sens_comb),
    compute_chi2(D_blackbox, df$sens_comb),
    compute_BF(D_blackbox, df$sens_comb),
    compute_prog_freq(D_blackbox - 1)
  )
  # Fair blackbox assignment
  D_blackboxfair <- apply(df[paste0(VAR_POLSCORE_NAME,"_mq")], 1, which.max)
  res["blackboxfair", ] <- c(
    FALSE,
    compute_policyvalue(D_blackboxfair, df[VAR_POLSCORE_NAME]),
    compute_cramerv(D_blackboxfair, df$sens_comb),
    compute_chi2(D_blackboxfair, df$sens_comb),
    compute_BF(D_blackboxfair, df$sens_comb),
    compute_prog_freq(D_blackboxfair - 1)
  )
  # All-in-one benchmark
  best_program <- which.max(colMeans(df[VAR_POLSCORE_NAME]))
  D_allin1 <- rep(best_program, nrow(df))
  res["allin1", ] <- c(
    TRUE,
    compute_policyvalue(D_allin1, df[VAR_POLSCORE_NAME]),
    0,
    1,
    log(0),
    compute_prog_freq(D_allin1 - 1)
  )
  
  results_list[[set_name]] <- as.data.frame(res)
}

# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Training sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.486 0.075 0 120.510 0.746 0.196 0.013 0.014 0.021 0.010
blackbox 0 18.179 0.096 0 270.415 0.000 0.000 0.366 0.632 0.002 0.000
blackboxfair 0 18.160 0.031 0 -10.661 0.000 0.000 0.369 0.626 0.003 0.002
allin1 1 17.882 0.000 1 -Inf 0.000 0.000 0.000 1.000 0.000 0.000
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Evaluation sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.528 0.065 0.000 18.118 0.745 0.197 0.013 0.014 0.021 0.009
blackbox 0 18.174 0.097 0.000 116.579 0.000 0.000 0.369 0.629 0.002 0.000
blackboxfair 0 18.152 0.028 0.001 -27.173 0.000 0.000 0.375 0.620 0.004 0.001
allin1 1 17.873 0.000 1.000 -Inf 0.000 0.000 0.000 1.000 0.000 0.000

4 Policy Trees

Next, we assess policy tree assignments for different adjustment scenarios: - No adjustment (\(S\) included in decision-relevant variables) - No adjustment (\(S\) excluded from decision-relevant variables) - Adjustment of decision-relevant variables - Adjustment of scores - Adjustment of decision-relevant variables and scores Note that policies involving adjustments of the decision-relevant variables are no longer interpretable when using the standard policy trees.

# Determine tree parameters besides depth:
# split.step:  To avoid lengthy computations, we determine a maximum of 100
# evaluation points for continuous variables. Hence, we set split.step = n/100
splitstep <- round(nrow(training_df)/100, 0)
# We set the minimum node size to 0.1*(n/depth):
minnodesize <- round(0.1*(nrow(training_df)/PT_DEPTH), 0)
# Determine tree type
if(PT_DEPTH == PT_SEARCH_DEPTH){
  tree_FUN <- function(
    X, Gamma, depth = PT_DEPTH, split.step = splitstep, 
    min.node.size = minnodesize, verbose = TRUE){
    policy_tree(
      X, Gamma, depth = depth, split.step = split.step,
      min.node.size = min.node.size, verbose=verbose)
  }
}else{
  tree_FUN <- function(
    X, Gamma, depth = PT_DEPTH, search.depth = PT_SEARCH_DEPTH,
    split.step = splitstep, min.node.size = minnodesize, verbose = TRUE){
    hybrid_policy_tree(
      X, Gamma, depth = depth, search.depth = search.depth,
      split.step = split.step, min.node.size = min.node.size, verbose=verbose)
  }
}
# Run and evaluate policy trees for different adjustment scenarios
opt_tree_list <- list()
data_list <- list(
  pt_unadj_inclS = list(
    exp = TRUE, As = colnames(cbind(As_org, sens_org)), scores = colnames(scores_org)),
  pt_unadj_exclS = list(
    exp = TRUE, As = colnames(As_org), scores = colnames(scores_org)),
  pt_adjust_A = list(
    exp = FALSE, As = colnames(As_mq), scores = colnames(scores_org)),
  pt_adjust_score = list(
    exp = TRUE, As = colnames(As_org), scores = colnames(scores_mq)),
  pt_adjust_A_score = list(
    exp = FALSE, As = colnames(As_mq), scores = colnames(scores_mq)),
  pt_adjust_Acdf = list(
    exp = FALSE, As = colnames(As_cdf), scores = colnames(scores_org)),
  pt_adjust_Acdf_score = list(
    exp = FALSE, As = colnames(As_cdf), scores = colnames(scores_mq)))
for(set_name in names(data_list)) {
  opt_tree <- tree_FUN(
    training_fair_df[data_list[[set_name]]$As],
    training_fair_df[data_list[[set_name]]$scores])
  opt_tree_list[[set_name]] <- opt_tree
  Dstar_opt <- predict(opt_tree, training_fair_df[data_list[[set_name]]$As])
  Dstar_opt_out <- predict(opt_tree, evaluation_fair_df[data_list[[set_name]]$As])
  results_list$training[set_name, ] <- c(
    data_list[[set_name]]$exp,
    compute_policyvalue(Dstar_opt, scores_org),
    compute_cramerv(Dstar_opt, sens_comb_org$sens_comb),
    compute_chi2(Dstar_opt, sens_comb_org$sens_comb),
    compute_BF(Dstar_opt, sens_comb_org$sens_comb),
    compute_prog_freq(Dstar_opt - 1))
  results_list$evaluation[set_name, ] <- c(
    data_list[[set_name]]$exp,
    compute_policyvalue(Dstar_opt_out, scores_org_out),
    compute_cramerv(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_chi2(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_BF(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_prog_freq(Dstar_opt_out - 1))
}
# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Training sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.486 0.075 0 120.510 0.746 0.196 0.013 0.014 0.021 0.010
blackbox 0 18.179 0.096 0 270.415 0.000 0.000 0.366 0.632 0.002 0.000
blackboxfair 0 18.160 0.031 0 -10.661 0.000 0.000 0.369 0.626 0.003 0.002
allin1 1 17.882 0.000 1 -Inf 0.000 0.000 0.000 1.000 0.000 0.000
pt_unadj_inclS 1 17.923 0.451 0 2305.498 0.000 0.000 0.199 0.801 0.000 0.000
pt_unadj_exclS 1 17.896 0.251 0 725.833 0.000 0.000 0.115 0.885 0.000 0.000
pt_adjust_A 0 17.897 0.083 0 67.227 0.000 0.000 0.130 0.870 0.000 0.000
pt_adjust_score 1 17.893 0.248 0 730.254 0.000 0.000 0.149 0.851 0.000 0.000
pt_adjust_A_score 0 17.894 0.088 0 78.424 0.000 0.000 0.187 0.813 0.000 0.000
pt_adjust_Acdf 0 17.898 0.080 0 62.356 0.000 0.000 0.137 0.863 0.000 0.000
pt_adjust_Acdf_score 0 17.894 0.080 0 63.007 0.000 0.000 0.196 0.804 0.000 0.000
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Evaluation sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.528 0.065 0.000 18.118 0.745 0.197 0.013 0.014 0.021 0.009
blackbox 0 18.174 0.097 0.000 116.579 0.000 0.000 0.369 0.629 0.002 0.000
blackboxfair 0 18.152 0.028 0.001 -27.173 0.000 0.000 0.375 0.620 0.004 0.001
allin1 1 17.873 0.000 1.000 -Inf 0.000 0.000 0.000 1.000 0.000 0.000
pt_unadj_inclS 1 17.909 0.459 0.000 1198.717 0.000 0.000 0.201 0.799 0.000 0.000
pt_unadj_exclS 1 17.882 0.266 0.000 394.799 0.000 0.000 0.115 0.885 0.000 0.000
pt_adjust_A 0 17.883 0.089 0.000 35.379 0.000 0.000 0.124 0.876 0.000 0.000
pt_adjust_score 1 17.880 0.248 0.000 359.030 0.000 0.000 0.145 0.855 0.000 0.000
pt_adjust_A_score 0 17.883 0.090 0.000 36.355 0.000 0.000 0.181 0.819 0.000 0.000
pt_adjust_Acdf 0 17.884 0.088 0.000 34.569 0.000 0.000 0.131 0.869 0.000 0.000
pt_adjust_Acdf_score 0 17.881 0.084 0.000 30.038 0.000 0.000 0.194 0.806 0.000 0.000
# Plot the trees
lab <- mappings[!is.na(mappings[VAR_D_NAME]), 'X']
plot(opt_tree_list[['pt_unadj_inclS']], leaf.labels=lab)
plot(opt_tree_list[['pt_unadj_exclS']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_A']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_score']], leaf.labels=lab)
plot(opt_tree_list[['pt_adjust_A_score']], leaf.labels=lab)

5 Probabilistic Split Trees

To regain interpretability of policy trees with adjusted decision-relevant variables, we can fit probabilisty split trees. We obtain one tree per sensitive group. In these trees there may occur probabilistic splits, meaning that individuals at the splitting threshold may proceed in both child nodes with a certain probability. These probabilistic splits are necessary to break the dependence of the resulting assignments with the sensitive attributes.

# Fit probabilistic split tree
data_list2 <- list(
  pst_adjust_A = list(exp = TRUE, adjust_scores = FALSE),
  pst_adjust_A_score = list(exp = TRUE, adjust_scores = TRUE))
for(set_name in names(data_list2)) {
  opt_tree <- prob_split_tree(
    As_org,
    scores_org,
    sens_org,
    adjust_scores=data_list2[[set_name]]$adjust_scores,
    seed=SEED,
    ties.method=TIES_METHOD,
    depth=PT_DEPTH,
    search.depth=PT_SEARCH_DEPTH,
    split.step=splitstep,
    min.node.size=minnodesize)
  opt_tree_list[[set_name]] <- opt_tree
  Dstar_opt <- predict(opt_tree, As_org, sens_org, seed=SEED)
  Dstar_opt_out <- predict(opt_tree, As_org_out, sens_org_out, seed=SEED)
  results_list$training[set_name, ] <- c(
    data_list2[[set_name]]$exp,
    compute_policyvalue(Dstar_opt, scores_org),
    compute_cramerv(Dstar_opt, sens_comb_org$sens_comb),
    compute_chi2(Dstar_opt, sens_comb_org$sens_comb),
    compute_BF(Dstar_opt, sens_comb_org$sens_comb),
    compute_prog_freq(Dstar_opt - 1))
  results_list$evaluation[set_name, ] <- c(
    data_list2[[set_name]]$exp,
    compute_policyvalue(Dstar_opt_out, scores_org_out),
    compute_cramerv(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_chi2(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_BF(Dstar_opt_out, sens_comb_org_out$sens_comb),
    compute_prog_freq(Dstar_opt_out - 1))
}
# Print results
knitr::kable(results_list[['training']], digits=3, caption="Training sample")
Training sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.486 0.075 0 120.510 0.746 0.196 0.013 0.014 0.021 0.010
blackbox 0 18.179 0.096 0 270.415 0.000 0.000 0.366 0.632 0.002 0.000
blackboxfair 0 18.160 0.031 0 -10.661 0.000 0.000 0.369 0.626 0.003 0.002
allin1 1 17.882 0.000 1 -Inf 0.000 0.000 0.000 1.000 0.000 0.000
pt_unadj_inclS 1 17.923 0.451 0 2305.498 0.000 0.000 0.199 0.801 0.000 0.000
pt_unadj_exclS 1 17.896 0.251 0 725.833 0.000 0.000 0.115 0.885 0.000 0.000
pt_adjust_A 0 17.897 0.083 0 67.227 0.000 0.000 0.130 0.870 0.000 0.000
pt_adjust_score 1 17.893 0.248 0 730.254 0.000 0.000 0.149 0.851 0.000 0.000
pt_adjust_A_score 0 17.894 0.088 0 78.424 0.000 0.000 0.187 0.813 0.000 0.000
pt_adjust_Acdf 0 17.898 0.080 0 62.356 0.000 0.000 0.137 0.863 0.000 0.000
pt_adjust_Acdf_score 0 17.894 0.080 0 63.007 0.000 0.000 0.196 0.804 0.000 0.000
pst_adjust_A 1 17.896 0.081 0 63.422 0.000 0.000 0.136 0.864 0.000 0.000
pst_adjust_A_score 1 17.893 0.079 0 61.521 0.000 0.000 0.196 0.804 0.000 0.000
knitr::kable(results_list[['evaluation']], digits=3, caption="Evaluation sample")
Evaluation sample
Interpr. Policy value CramerV chi2 pval log(BF) 0 1 2 3 4 5
observed 0 14.528 0.065 0.000 18.118 0.745 0.197 0.013 0.014 0.021 0.009
blackbox 0 18.174 0.097 0.000 116.579 0.000 0.000 0.369 0.629 0.002 0.000
blackboxfair 0 18.152 0.028 0.001 -27.173 0.000 0.000 0.375 0.620 0.004 0.001
allin1 1 17.873 0.000 1.000 -Inf 0.000 0.000 0.000 1.000 0.000 0.000
pt_unadj_inclS 1 17.909 0.459 0.000 1198.717 0.000 0.000 0.201 0.799 0.000 0.000
pt_unadj_exclS 1 17.882 0.266 0.000 394.799 0.000 0.000 0.115 0.885 0.000 0.000
pt_adjust_A 0 17.883 0.089 0.000 35.379 0.000 0.000 0.124 0.876 0.000 0.000
pt_adjust_score 1 17.880 0.248 0.000 359.030 0.000 0.000 0.145 0.855 0.000 0.000
pt_adjust_A_score 0 17.883 0.090 0.000 36.355 0.000 0.000 0.181 0.819 0.000 0.000
pt_adjust_Acdf 0 17.884 0.088 0.000 34.569 0.000 0.000 0.131 0.869 0.000 0.000
pt_adjust_Acdf_score 0 17.881 0.084 0.000 30.038 0.000 0.000 0.194 0.806 0.000 0.000
pst_adjust_A 1 17.884 0.087 0.000 33.111 0.000 0.000 0.130 0.870 0.000 0.000
pst_adjust_A_score 1 17.882 0.077 0.000 23.446 0.000 0.000 0.192 0.808 0.000 0.000
# Plot the trees
plot(opt_tree_list[['pst_adjust_A']], sens_names=VAR_S_NAME_ORD, leaf.labels=lab)
plot(opt_tree_list[['pst_adjust_A_score']], sens_names=VAR_S_NAME_ORD, leaf.labels=lab)
# Combine to one tree by manually adjusting dot string
dot_string_total <- ""
j <- 0
for(i in names(opt_tree_list[['pst_adjust_A_score']])){
  j <- j + 1
  dot_string2 <- plot(opt_tree_list[['pst_adjust_A_score']][[i]], leaf.labels=lab)[["x"]][["diagram"]]
  dot_string2 <- gsub("\n(\\d)", paste0("\n",j,"\\1"), dot_string2)
  dot_string2 <- gsub("-> (\\d)", paste0("-> ",j,"\\1"), dot_string2)
  dot_string2 <- sub("digraph nodes { \n node [shape=box] ;\n", "", dot_string2, fixed = TRUE)
  dot_string2 <- sub("\n}", "", dot_string2, fixed = TRUE)
  dot_string2 <- gsub(
    "[labeldistance=2.5, labelangle=45, headlabel=\"True\"]", "", dot_string2, fixed = TRUE)
  dot_string2 <- gsub(
    "[labeldistance=2.5, labelangle=-45, headlabel=\"False\"]", "", dot_string2, fixed = TRUE)
  dot_string_total <- paste0(dot_string_total, dot_string2)
}
dot_string <- paste0("
      digraph combined {
        rankdir=LR;
        node [shape=box];
        top [label=\"female = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
        top2 [label=\"swiss = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
        top3 [label=\"swiss = 1\", shape=ellipse, style=filled, fillcolor=lightblue];
        ", dot_string_total, "
        top -> top2 [labeldistance=3, labelangle=-30, headlabel='True'];
        top -> top3 [labeldistance=3, labelangle=30, headlabel='False'];
        top2 -> ", 10, ";
        top2 -> ", 20, ";
        top3 -> ", 30, ";
        top3 -> ", 40, ";
      }
    ")
dot_string <- gsub("qual_degree", "degree", dot_string, fixed = TRUE)
dot_string <- gsub("past_income", "past earnings", dot_string, fixed = TRUE)
DiagrammeR::grViz(dot_string)

6 Partial Adjustments

One may only partially adjust the variables to further analyse the trade-off between policy value and fairness. This is achieved by a linear combination of adjusted and original variables for different weights between 0 and 1. The steps are as follows:

  1. Get grid of weights from 0 to 1 by 0.1 steps
  2. Get weighted combinations of scores and decision-relevant variables
  3. Run policy trees
  4. Compute policy value and fairness measures and plot
# Define weights
weights <- seq(0,1,0.1)
inv_weights <- 1 - weights
As_weight <- mapply(
  function(w, iw) round(As_mq * w + As_org * iw, 0), weights, inv_weights, SIMPLIFY = FALSE)
As_weight_out <- mapply(
  function(w, iw) round(As_mq_out * w + As_org_out * iw, 0), weights, inv_weights, SIMPLIFY = FALSE)
scores_weight <- mapply(
  function(w, iw) scores_mq * w + scores_org * iw, weights, inv_weights, SIMPLIFY = FALSE)
As_org_list <- replicate(length(weights), As_org, simplify = FALSE)
As_org_out_list <- replicate(length(weights), As_org_out, simplify = FALSE)
scores_org_list <- replicate(length(weights), scores_org, simplify = FALSE)
# Setup scenarios
scenarios <- list(
  clean_As = list(As = As_weight, scores = scores_org_list, As_out = As_weight_out),
  clean_Scores = list(As = As_org_list, scores = scores_weight, As_out = As_org_out_list),
  clean_Scores_As = list(As = As_weight, scores = scores_weight, As_out = As_weight_out)
)
# Compute policy trees and policy value/fairness measures
rows <- list()
opt_tree_weight <- list()
for(scen in names(scenarios)){
  As_list <- scenarios[[scen]]$As
  Scores_list <- scenarios[[scen]]$scores
  As_out_list <- scenarios[[scen]]$As_out
  opt_tree_weight[[scen]] <- list()
  for(i in seq_along(weights)){
    opt_tree_weight[[scen]][[i]] <- tree_FUN(As_list[[i]], Scores_list[[i]])
    Dstar <- predict(opt_tree_weight[[scen]][[i]], As_out_list[[i]])
    rows[[length(rows) + 1]] <- data.frame(
      Weight = weights[i], 
      Metric='Policy value', 
      Scenario=scen, 
      Value=compute_policyvalue(Dstar, scores_org_out))
    rows[[(length(rows) + 1)]] <- data.frame(
      Weight = weights[i], 
      Metric='CramerV', 
      Scenario=scen, 
      Value=compute_cramerv(Dstar, sens_comb_org_out$sens_comb)[[1]])
  }
}
results_partial <- do.call(rbind, rows)
range_policyvalue <- results_partial %>% filter(Metric == "Policy value") %>%
  summarise(min_val = floor(min(Value) * 10) / 10, max_val = ceiling(
    max(Value) * 10) / 10)
range_CramerV <- results_partial %>% filter(Metric == "CramerV") %>%
  summarise(min_val = floor(min(Value) * 10) / 10, max_val = ceiling(
    max(Value) * 10) / 10)
# Plot
# Plot
p1 <- ggplot(results_partial[results_partial$Metric == "Policy value", ], aes(
  x = Weight, y = Value, color = Scenario, linetype= Scenario)) +
  geom_line(linewidth=0.7) +
  labs(
    title = "Policy Value", x = "Weight of adjusted variable",
    color = "Scenario:", linetype= "Scenario:") +
  ylim(range_policyvalue$min_val, range_policyvalue$max_val) +
  theme_minimal() +
  scale_color_discrete(labels = c(
    expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
    expression(paste("Adjust ", A, " and ", Gamma["d"])))) +
  scale_linetype_discrete(labels = c(
    expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
    expression(paste("Adjust ", A, " and ", Gamma["d"]))))
p2 <- ggplot(results_partial[results_partial$Metric == "CramerV", ], aes(
  x = Weight, y = Value, color = Scenario, linetype= Scenario)) +
  geom_line(linewidth=0.7) +
  labs(
    title = "Fairness (Cramer's V)", x = "Weight of adjusted variable",
    color = "Scenario:", linetype= "Scenario:") +
  ylim(0, range_CramerV$max_val) +
  theme_minimal() +
  scale_color_discrete(labels = c(
    expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
    expression(paste("Adjust ", A, " and ", Gamma["d"])))) +
  scale_linetype_discrete(labels = c(
    expression(paste("Adjust ", A)), expression(paste("Adjust ", Gamma["d"])),
    expression(paste("Adjust ", A, " and ", Gamma["d"]))))
plot_combined <- (p1 + p2) + plot_layout(guides = "collect")
plot_combined

7 Winners and Losers

Finally, we can assess the winners and losers of the probabilistic tree assignments compared to unadjusted policy tree assignments. The “winners” and “loser” groups are determined by kmeans clustering, with the number of groups determined by the Silhouette score with the restriction of a minimum cluster size of 1% of the observations.

# Prepare data
individual_policyvalue <- function(Dstar, scores) {
  scores[cbind(seq_along(Dstar), Dstar)]
}
data_df <- cbind(
  evaluation_df[, columns$X_ord[columns$X_ord > 0]],
  fastDummies::dummy_cols(
    data.frame(lapply(evaluation_df[, columns$X_unord[columns$X_unord > 0]], as.character)),
    remove_selected_columns = TRUE
  ),
  policyvalue_adjusted = individual_policyvalue(
    predict(tree = opt_tree_list[["pst_adjust_A_score"]], A = As_org_out, sens = sens_org_out),
    scores_org_out
  ),
  policyvalue_unadjusted = individual_policyvalue(
    predict(opt_tree_list[['pt_unadj_exclS']], As_org_out),
    scores_org_out
  )
)
# Compute the difference between actual and reference policy value.
data_df$policyvalue_diff <- data_df$policyvalue_adjusted - data_df$policyvalue_unadjusted
policyvalue_diff_matrix <- matrix(data_df$policyvalue_diff, ncol = 1)
# Determine optimal number of clusters using Silhouette score
dist_matrix <- stats::dist(policyvalue_diff_matrix)
min_cluster_size <- ceiling(nrow(data_df) / 100)
sil_width <- sapply(2:8, function(k){
  km <- kmeans(policyvalue_diff_matrix, centers = k, nstart = 10)
  ss <- silhouette(km$cluster, dist_matrix)
  if(min(km$size) > min_cluster_size){
    mean(ss[, 3])
  }else{
    0
  }
})
n_clusters <- which.max(sil_width) + 1
# Get clusters ordered by cluster mean
km <- kmeans(policyvalue_diff_matrix, centers = n_clusters, nstart = 10)
ordered_clusters <- order(km$centers)
cluster_map <- setNames(seq_along(ordered_clusters), ordered_clusters)
data_df$cluster_ids <- cluster_map[as.character(km$cluster)]
# Get variables means
df_grouped_means <- data_df %>%
  group_by(cluster_ids) %>%
  summarise(n = n(), across(where(is.numeric),\(x) mean(x, na.rm = TRUE)))  %>%
  pivot_longer(cols = -cluster_ids, names_to = "variable", values_to = "mean") %>%
  pivot_wider(names_from = cluster_ids, values_from = mean) %>%
  arrange(factor(variable, levels = c("policyvalue_diff", setdiff(variable, "policyvalue_diff")))) %>% 
  mutate(across(where(is.numeric), round, 2))
# Generale meaningful cluster labels
generate_cluster_labels <- function(means) {
  labels <- character(length(means))
  gains <- means > 0
  losses <- means < 0
  zeros <- means == 0
  intensity_labels <- c("Strong", "Moderate", "Mild", "Slight")
  # Gains
  if(sum(gains) > 0){
    ranks <- rank(-means[gains], ties.method = "first")
    intensity <- if(sum(gains) > 1) paste(intensity_labels[seq_along(ranks)], "Gain") else "Gain"
    labels[gains] <- intensity[order(ranks)]
  }
  # Losses
  if(sum(losses) > 0){
    ranks <- rank(means[losses], ties.method = "first")
    intensity <- if(sum(losses) > 1) paste(intensity_labels[seq_along(ranks)], "Loss") else "Loss"
    labels[losses] <- intensity[order(ranks)]
  }
  # No change
  labels[zeros] <- "No Change"
  return(labels)
}
colnames(df_grouped_means) <- c("Variable", generate_cluster_labels(
  round(as.numeric(subset(df_grouped_means, variable == 'policyvalue_diff')[, -1]),2)))
# Print
knitr::kable(
  df_grouped_means,
  caption="Covariate means of k-means clusters")
Covariate means of k-means clusters
Variable Strong Loss Moderate Loss No Change Moderate Gain Strong Gain
policyvalue_diff -2.16 -1.00 0.00 0.93 1.97
n 185.00 392.00 10688.00 399.00 196.00
female 0.54 0.58 0.43 0.52 0.58
swiss 0.79 0.59 0.64 0.65 0.73
age 42.41 42.32 36.00 42.18 42.57
canton_moth_tongue 0.03 0.09 0.11 0.09 0.07
cw_age 42.74 43.20 44.08 46.03 47.41
cw_cooperative 0.59 0.51 0.49 0.39 0.32
cw_educ_above_voc 0.33 0.44 0.46 0.50 0.58
cw_educ_tertiary 0.38 0.19 0.20 0.13 0.11
cw_female 0.54 0.49 0.44 0.43 0.35
cw_missing 0.05 0.05 0.05 0.04 0.08
cw_own_ue 0.70 0.59 0.62 0.64 0.71
cw_tenure 4.61 5.33 5.53 5.97 6.13
cw_voc_degree 0.24 0.28 0.26 0.26 0.14
emp_share_last_2yrs 0.72 0.76 0.81 0.78 0.81
emp_spells_5yrs 1.23 1.20 1.19 1.11 0.93
employability 1.85 1.84 1.93 1.75 1.84
foreigner_b 0.06 0.16 0.13 0.12 0.07
foreigner_c 0.14 0.25 0.23 0.23 0.19
gdp_pc 0.59 0.55 0.52 0.48 0.48
married 0.50 0.61 0.46 0.63 0.64
other_mother_tongue 0.15 0.42 0.33 0.40 0.34
past_income 25671.35 27755.66 43964.99 27246.47 28049.13
ue_cw_allocation1 0.68 0.58 0.59 0.54 0.47
ue_cw_allocation2 0.58 0.53 0.51 0.42 0.31
ue_cw_allocation3 0.06 0.05 0.04 0.05 0.02
ue_cw_allocation4 0.06 0.09 0.08 0.10 0.05
ue_cw_allocation5 0.08 0.16 0.12 0.13 0.16
ue_cw_allocation6 0.10 0.11 0.09 0.10 0.07
ue_spells_last_2yrs 0.62 0.72 0.56 0.62 0.43
unemp_rate 4.25 3.72 3.51 3.19 3.20
qual_degree 0.62 0.42 0.58 0.47 0.43
city_1 0.56 0.65 0.68 0.75 0.77
city_2 0.14 0.11 0.13 0.14 0.14
city_3 0.30 0.24 0.20 0.12 0.10
prev_job_sec_cat_0 0.66 0.56 0.61 0.59 0.61
prev_job_sec_cat_1 0.07 0.12 0.14 0.13 0.12
prev_job_sec_cat_2 0.24 0.26 0.17 0.20 0.20
prev_job_sec_cat_3 0.03 0.06 0.09 0.08 0.07
prev_job_0 0.66 0.50 0.61 0.45 0.52
prev_job_1 0.04 0.04 0.07 0.03 0.05
prev_job_2 0.24 0.44 0.28 0.50 0.41
prev_job_3 0.06 0.02 0.03 0.02 0.02
qual_0 0.62 0.42 0.58 0.47 0.43
qual_1 0.09 0.18 0.16 0.20 0.32
qual_2 0.26 0.36 0.23 0.30 0.21
qual_3 0.02 0.03 0.04 0.03 0.04
policyvalue_adjusted 15.19 14.39 18.16 15.61 16.88
policyvalue_unadjusted 17.34 15.39 18.16 14.68 14.91