This notebook runs the application described in Section 5.2 of the
paper. The first part replicates the results presented in the paper, the
second part provides supplementary information
1. Replication of paper results
Getting started
First, load packages and set the seed:
library(OutcomeWeights)
library(hdm)
library(grf)
library(tidyverse)
library(viridis) # for color-blind friendly plotting
library(reshape2)
library(ggridges)
set.seed(1234)
Next, load the data. Here we use the 401(k) data of the
hdm
package. However, you can adapt the following code
chunk to load any suitable data of your choice. Just make sure to call
the treatment D
, covariates X
, and instrument
Z
. The rest of the notebook should run without further
modifications.
data(pension) # Find variable description if you type ?pension in console
# Treatment
D = pension$p401
# Instrument
Z = pension$e401
# Outcome
Y = pension$net_tfa
# Controls
X = model.matrix(~ 0 + age + db + educ + fsize + hown + inc + male + marr + pira + twoearn, data = pension)
var_nm = c("Age","Benefit pension","Education","Family size","Home owner","Income","Male","Married","IRA","Two earners")
colnames(X) = var_nm
Get outcome model and smoother matrix
The grf
package does not save the nuisance parameter
models. Thus, we could not retrieve the required smoother matrices after
running causal_forest
and instrumental_forest
below. Therefore, we estimate the outcome nuisance model externally to
pass the nuisance parameters later to the functions. As described in
Section 5.2 of the paper, we run a default and a tuned version:
### Externally calculate outcome nuisance
rf_Y.hat_default = regression_forest(X,Y)
rf_Y.hat_tuned = regression_forest(X,Y,tune.parameters = "all")
Y.hat_default = predict(rf_Y.hat_default)$predictions
Y.hat_tuned = predict(rf_Y.hat_tuned)$predictions
Then, we extract the smoother matrix using
get_forest_weights()
:
# And get smoother matrices
S_default = get_forest_weights(rf_Y.hat_default)
S_tuned = get_forest_weights(rf_Y.hat_tuned)
For illustration, check that random forest is an affine smoother by
summing all smoother vectors and checking whether they sum to one:
cat("RF affine smoother?",
all.equal(rowSums(as.matrix(S_default)),
rep(1,length(Y))
))
RF affine smoother? TRUE
Causal forest
Run causal forest with the externally estimated outcome nuisance:
# Run CF with the pre-specified outcome nuisance
cf_default = causal_forest(X,Y,D,Y.hat=Y.hat_default)
cf_tuned = causal_forest(X,Y,D,Y.hat=Y.hat_tuned,tune.parameters = "all")
Get the out-of-bag CATEs:
cates_default = predict(cf_default)$predictions
cates_tuned = predict(cf_tuned)$predictions
Use the new get_outcome_weights()
method, which requires
to pass the externally estimated outcome smoother matrix:
omega_cf_default = get_outcome_weights(cf_default, S = S_default)
omega_cf_tuned = get_outcome_weights(cf_tuned, S = S_tuned)
Observe that the outcome weights recover the grf
package
output:
cat("ω'Y replicates CATE point estimates (default)?",
all.equal(as.numeric(omega_cf_default$omega %*% Y),
as.numeric(cates_default)
))
ω'Y replicates CATE point estimates (default)? TRUE
cat("\nω'Y replicates CATE point estimates (tuned)?",
all.equal(as.numeric(omega_cf_tuned$omega %*% Y),
as.numeric(cates_tuned)
))
ω'Y replicates CATE point estimates (tuned)? TRUE
Now calculate the absolute standardized mean differences and plot
them for each CATE and variables (Figure 3a in paper):
cb_cate_default = standardized_mean_differences(X,D,omega_cf_default$omega,X)
cb_cate_tuned = standardized_mean_differences(X,D,omega_cf_tuned$omega,X)
smd_default = t(abs(cb_cate_default[,3,]))
smd_tuned = t(abs(cb_cate_tuned[,3,]))
# Melt the smd_default matrix to long format
df_default_long = melt(smd_default)
df_default_long$Group = "smd_default" # Add a group identifier
# Melt the smd_tuned matrix to long format
df_tuned_long = melt(smd_tuned)
df_tuned_long$Group = "smd_tuned" # Add a group identifier
# Combine the two data frames
df_long = rbind(df_default_long, df_tuned_long)
# Rename the columns for clarity
colnames(df_long) = c("Row", "Variable", "Value", "Group")
# Create the ggplot
figure3a = ggplot(df_long, aes(x = factor(Variable, levels = rev(unique(Variable))), y = Value, fill = Group)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
labs(x = element_blank(), y = "Absolute Standardized Mean Differences") +
scale_fill_manual(values = viridis(2),
name = element_blank(),
labels = c("default", "tuned")) +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
coord_flip() +
theme(legend.position = "bottom")
figure3a
Instrumental forest
Run instrumental forest with the externally estimated outcome
nuisance:
# Run IF with the pre-specified outcome nuisance
ivf_default = instrumental_forest(X,Y,D,Z,Y.hat=Y.hat_default)
ivf_tuned = instrumental_forest(X,Y,D,Z,Y.hat=Y.hat_tuned,tune.parameters = "all")
Get the out-of-bag CLATEs:
clates_default = predict(ivf_default)$predictions
clates_tuned = predict(ivf_tuned)$predictions
Use the new get_outcome_weights()
method, which requires
to pass the externally estimated outcome smoother matrix:
omega_if_default = get_outcome_weights(ivf_default, S = S_default)
omega_if_tuned = get_outcome_weights(ivf_tuned, S = S_tuned)
Observe that the outcome weights recover the grf
package
output:
cat("ω'Y replicates CLATE point estimates (default)?",
all.equal(as.numeric(omega_if_default$omega %*% Y),
as.numeric(clates_default)
))
ω'Y replicates CLATE point estimates (default)? TRUE
cat("\nω'Y replicates CLATE point estimates (tuned)?",
all.equal(as.numeric(omega_if_tuned$omega %*% Y),
as.numeric(clates_tuned)
))
ω'Y replicates CLATE point estimates (tuned)? TRUE
Now calculate the absolute standardized mean differences and plot
them for each CLATE and variables (Figure 3b in paper):
cb_clate_default = standardized_mean_differences(X,D,omega_if_default$omega,X)
cb_clate_tuned = standardized_mean_differences(X,D,omega_if_tuned$omega,X)
smd_default = t(abs(cb_clate_default[,3,]))
smd_tuned = t(abs(cb_clate_tuned[,3,]))
# Melt the smd_default matrix to long format
df_default_long = melt(smd_default)
df_default_long$Group = "smd_default" # Add a group identifier
# Melt the smd_tuned matrix to long format
df_tuned_long = melt(smd_tuned)
df_tuned_long$Group = "smd_tuned" # Add a group identifier
# Combine the two data frames
df_long = rbind(df_default_long, df_tuned_long)
# Rename the columns for clarity
colnames(df_long) = c("Row", "Variable", "Value", "Group")
# Create the ggplot
figure3b = ggplot(df_long, aes(x = factor(Variable, levels = rev(unique(Variable))), y = Value, fill = Group)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
labs(x = element_blank(), y = "Absolute Standardized Mean Differences") +
scale_fill_manual(values = viridis(2),
name = element_blank(),
labels = c("default", "tuned")) +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
coord_flip() +
theme(legend.position = "bottom")
figure3b
2. Supplementary results
Plot results
Here we observe that not only the default effects show much worse
balancing, but they also lead to implausibly high variance in their
estimates.
data = data.frame(
value = c(cates_default, cates_tuned,clates_default, clates_tuned),
category = rep(c("grf CATEs default", "grf CATEs tuned","grf CLATEs default", "grf CLATEs tuned"), each = length(cates_default))
)
ggplot(data, aes(y = category, x = value, fill = category)) +
geom_boxplot(alpha = 0.7) +
geom_vline(xintercept = 0, color = "black", linetype = "solid") +
labs(
x = "Estimate",
y = "Estimator/Implementation"
) +
theme_minimal() +
theme(legend.position = "none")
# Create the ridge plot
ggplot(data, aes(x = value, y = category, fill = category)) +
geom_density_ridges(alpha = 0.7, scale = 1) +
labs(
x = "Estimate",
y = "Estimator/Implementation"
) +
theme_minimal() +
theme(legend.position = "none")
Summarize weights
Finally, we summarize the weights vectors for each C(L)ATE plotting
different descriptives:
Minimum weight
Maximum weight
% Negative
Sum largest 10%
Sum of weights
Sum of absolute weights
Especially the sum of weights is interesting in light of the paper.
It shows that tuning also produces weight sums closer to one in this
data set. However, also negative weights are fewer and less pronounced
for the tuned version:
for (i in 1:dim(summary_weights_if_tuned)[3]) {
# Extract untreated and treated weights for each group
sum_weights_cf_default_untreated = summary_weights_cf_default[1,,i]
sum_weights_cf_default_treated = summary_weights_cf_default[2,,i]
sum_weights_cf_tuned_untreated = summary_weights_cf_tuned[1,,i]
sum_weights_cf_tuned_treated = summary_weights_cf_tuned[2,,i]
sum_weights_if_default_untreated = summary_weights_if_default[1,,i]
sum_weights_if_default_treated = summary_weights_if_default[2,,i]
sum_weights_if_tuned_untreated = summary_weights_if_tuned[1,,i]
sum_weights_if_tuned_treated = summary_weights_if_tuned[2,,i]
# Combine all vectors into a single data frame, with a new 'Treatment' column
df_weights <- data.frame(
Value = c(
sum_weights_cf_default_untreated, sum_weights_cf_default_treated,
sum_weights_cf_tuned_untreated, sum_weights_cf_tuned_treated,
sum_weights_if_default_untreated, sum_weights_if_default_treated,
sum_weights_if_tuned_untreated, sum_weights_if_tuned_treated
),
Group = factor(c(
rep("CF default", length(sum_weights_cf_default_untreated) + length(sum_weights_cf_default_treated)),
rep("CF tuned", length(sum_weights_cf_tuned_untreated) + length(sum_weights_cf_tuned_treated)),
rep("IF default", length(sum_weights_if_default_untreated) + length(sum_weights_if_default_treated)),
rep("IF tuned", length(sum_weights_if_tuned_untreated) + length(sum_weights_if_tuned_treated))
)),
Treatment = factor(c(
rep("Untreated", length(sum_weights_cf_default_untreated)),
rep("Treated", length(sum_weights_cf_default_treated)),
rep("Untreated", length(sum_weights_cf_tuned_untreated)),
rep("Treated", length(sum_weights_cf_tuned_treated)),
rep("Untreated", length(sum_weights_if_default_untreated)),
rep("Treated", length(sum_weights_if_default_treated)),
rep("Untreated", length(sum_weights_if_tuned_untreated)),
rep("Treated", length(sum_weights_if_tuned_treated))
))
)
# Plot with ggplot and ggridges
g <- ggplot(df_weights, aes(x = Value, y = Group, fill = Treatment)) +
geom_boxplot(position = position_dodge(width = 0.75)) +
labs(x = dimnames(summary_weights_cf_tuned)[[3]][i], y = NULL) +
theme_minimal() +
theme(legend.position = "bottom") # Place legend at the bottom
print(g)
}
---
title: "Treatment Effect Estimators as Weighted Outcomes"
subtitle: "Application 401(k) - heterogeneous effects"
author: "Michael C. Knaus"
date: "11/24"
output: 
  html_notebook:
    toc: true
    toc_float: true
    code_folding: show
---

*Replication comment: Running the notebooks within the replication docker ensures that results perfectly replicate. Otherwise, results might differ depending on which package versions you use.*


This notebook runs the application described in Section 5.2 of the paper. The first part replicates the results presented in the paper, the second part provides supplementary information

# 1. Replication of paper results

## Getting started

First, load packages and set the seed:

```{r, message = FALSE, warning=FALSE}
library(OutcomeWeights)
library(hdm)
library(grf)
library(tidyverse)
library(viridis) # for color-blind friendly plotting
library(reshape2)
library(ggridges)

set.seed(1234)
```

Next, load the data. Here we use the 401(k) data of the `hdm` package. However, you can adapt the following code chunk to load any suitable data of your choice. Just make sure to call the treatment `D`, covariates `X`, and instrument `Z`. The rest of the notebook should run without further modifications.

```{r}
data(pension) # Find variable description if you type ?pension in console

# Treatment
D = pension$p401
# Instrument
Z = pension$e401
# Outcome
Y = pension$net_tfa
# Controls
X = model.matrix(~ 0 + age + db + educ + fsize + hown + inc + male + marr + pira + twoearn, data = pension)
var_nm = c("Age","Benefit pension","Education","Family size","Home owner","Income","Male","Married","IRA","Two earners")
colnames(X) = var_nm
```


## Get outcome model and smoother matrix

The `grf` package does not save the nuisance parameter models. Thus, we could not retrieve the required smoother matrices after running `causal_forest` and `instrumental_forest` below. Therefore, we estimate the outcome nuisance model externally to pass the nuisance parameters later to the functions. As described in Section 5.2 of the paper, we run a default and a tuned version:

```{r}
### Externally calculate outcome nuisance
rf_Y.hat_default = regression_forest(X,Y)
rf_Y.hat_tuned = regression_forest(X,Y,tune.parameters = "all")
Y.hat_default = predict(rf_Y.hat_default)$predictions
Y.hat_tuned = predict(rf_Y.hat_tuned)$predictions
```

Then, we extract the smoother matrix using `get_forest_weights()`:

```{r}
# And get smoother matrices
S_default = get_forest_weights(rf_Y.hat_default)
S_tuned = get_forest_weights(rf_Y.hat_tuned)
```

For illustration, check that random forest is an affine smoother by summing all smoother vectors and checking whether they sum to one:

```{r}
cat("RF affine smoother?", 
    all.equal(rowSums(as.matrix(S_default)),
      rep(1,length(Y))
    ))
```


## Causal forest

Run causal forest with the externally estimated outcome nuisance:

```{r}
# Run CF with the pre-specified outcome nuisance 
cf_default = causal_forest(X,Y,D,Y.hat=Y.hat_default)
cf_tuned = causal_forest(X,Y,D,Y.hat=Y.hat_tuned,tune.parameters = "all")
```

Get the out-of-bag CATEs:

```{r}
cates_default = predict(cf_default)$predictions
cates_tuned = predict(cf_tuned)$predictions
```

Use the new `get_outcome_weights()` method, which requires to pass the externally estimated outcome smoother matrix:

```{r}
omega_cf_default = get_outcome_weights(cf_default, S = S_default)
omega_cf_tuned = get_outcome_weights(cf_tuned, S = S_tuned)
```

Observe that the outcome weights recover the `grf` package output:

```{r}
cat("ω'Y replicates CATE point estimates (default)?", 
    all.equal(as.numeric(omega_cf_default$omega %*% Y),
      as.numeric(cates_default)
    ))
cat("\nω'Y replicates CATE point estimates (tuned)?", 
    all.equal(as.numeric(omega_cf_tuned$omega %*% Y),
      as.numeric(cates_tuned)
    ))
```

Now calculate the absolute standardized mean differences and plot them for each CATE and variables (Figure 3a in paper):

```{r}
cb_cate_default = standardized_mean_differences(X,D,omega_cf_default$omega,X)
cb_cate_tuned = standardized_mean_differences(X,D,omega_cf_tuned$omega,X)

smd_default = t(abs(cb_cate_default[,3,]))
smd_tuned = t(abs(cb_cate_tuned[,3,]))

# Melt the smd_default matrix to long format
df_default_long = melt(smd_default)
df_default_long$Group = "smd_default"  # Add a group identifier

# Melt the smd_tuned matrix to long format
df_tuned_long = melt(smd_tuned)
df_tuned_long$Group = "smd_tuned"  # Add a group identifier

# Combine the two data frames
df_long = rbind(df_default_long, df_tuned_long)

# Rename the columns for clarity
colnames(df_long) = c("Row", "Variable", "Value", "Group")

# Create the ggplot
figure3a = ggplot(df_long, aes(x = factor(Variable, levels = rev(unique(Variable))), y = Value, fill = Group)) +
  geom_boxplot(position = position_dodge(width = 0.8)) +
  labs(x = element_blank(), y = "Absolute Standardized Mean Differences") +
  scale_fill_manual(values = viridis(2),
                    name = element_blank(),
                    labels = c("default", "tuned")) +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") + 
  coord_flip() +
  theme(legend.position = "bottom")
figure3a
```


## Instrumental forest

Run instrumental forest with the externally estimated outcome nuisance:

```{r}
# Run IF with the pre-specified outcome nuisance
ivf_default = instrumental_forest(X,Y,D,Z,Y.hat=Y.hat_default)
ivf_tuned = instrumental_forest(X,Y,D,Z,Y.hat=Y.hat_tuned,tune.parameters = "all")
```

Get the out-of-bag CLATEs:

```{r}
clates_default = predict(ivf_default)$predictions
clates_tuned = predict(ivf_tuned)$predictions
```

Use the new `get_outcome_weights()` method, which requires to pass the externally estimated outcome smoother matrix:

```{r}
omega_if_default = get_outcome_weights(ivf_default, S = S_default)
omega_if_tuned = get_outcome_weights(ivf_tuned, S = S_tuned)
```

Observe that the outcome weights recover the `grf` package output:

```{r}
cat("ω'Y replicates CLATE point estimates (default)?", 
    all.equal(as.numeric(omega_if_default$omega %*% Y),
      as.numeric(clates_default)
    ))
cat("\nω'Y replicates CLATE point estimates (tuned)?", 
    all.equal(as.numeric(omega_if_tuned$omega %*% Y),
      as.numeric(clates_tuned)
    ))
```

Now calculate the absolute standardized mean differences and plot them for each CLATE and variables (Figure 3b in paper):

```{r}
cb_clate_default = standardized_mean_differences(X,D,omega_if_default$omega,X)
cb_clate_tuned = standardized_mean_differences(X,D,omega_if_tuned$omega,X)

smd_default = t(abs(cb_clate_default[,3,]))
smd_tuned = t(abs(cb_clate_tuned[,3,]))

# Melt the smd_default matrix to long format
df_default_long = melt(smd_default)
df_default_long$Group = "smd_default"  # Add a group identifier

# Melt the smd_tuned matrix to long format
df_tuned_long = melt(smd_tuned)
df_tuned_long$Group = "smd_tuned"  # Add a group identifier

# Combine the two data frames
df_long = rbind(df_default_long, df_tuned_long)

# Rename the columns for clarity
colnames(df_long) = c("Row", "Variable", "Value", "Group")

# Create the ggplot
figure3b = ggplot(df_long, aes(x = factor(Variable, levels = rev(unique(Variable))), y = Value, fill = Group)) +
  geom_boxplot(position = position_dodge(width = 0.8)) +
  labs(x = element_blank(), y = "Absolute Standardized Mean Differences") +
  scale_fill_manual(values = viridis(2),
                    name = element_blank(),
                    labels = c("default", "tuned")) +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "solid", color = "black") + 
  coord_flip() +
  theme(legend.position = "bottom")
figure3b
```


```{r, echo=F}
# This part is relevant if you run the notebooks inside the docker and want to save graphs and image in a shared host volume called shared_files (uncomment and/or adjust on demand):

# ggsave("/home/rstudio/shared_files/Figure3a.pdf", plot = figure3a, width = 6, height = 4,dpi=300)
# ggsave("/home/rstudio/shared_files/Figure3a.png", plot = figure3a, width = 6, height = 4,dpi=800)
# ggsave("/home/rstudio/shared_files/Figure3b.pdf", plot = figure3b, width = 6, height = 4,dpi=300)
# ggsave("/home/rstudio/shared_files/Figure3b.png", plot = figure3b, width = 6, height = 4,dpi=800)
# save.image(file = "/home/rstudio/shared_files/Application_401k_heterogeneous.RData")
```


# 2. Supplementary results

## Plot results

Here we observe that not only the default effects show much worse balancing, but they also lead to implausibly high variance in their estimates.

```{r}
data = data.frame(
  value = c(cates_default, cates_tuned,clates_default, clates_tuned),
  category = rep(c("grf CATEs default", "grf CATEs tuned","grf CLATEs default", "grf CLATEs tuned"), each = length(cates_default))
)

ggplot(data, aes(y = category, x = value, fill = category)) +
  geom_boxplot(alpha = 0.7) +
  geom_vline(xintercept = 0, color = "black", linetype = "solid") +
  labs(
    x = "Estimate",
    y = "Estimator/Implementation"
  ) +
  theme_minimal() +
  theme(legend.position = "none")
```


```{r}
# Create the ridge plot
ggplot(data, aes(x = value, y = category, fill = category)) +
  geom_density_ridges(alpha = 0.7, scale = 1) +
  labs(
    x = "Estimate",
    y = "Estimator/Implementation"
  ) +
  theme_minimal() +
  theme(legend.position = "none")
```


## Summarize weights

Finally, we summarize the weights vectors for each C(L)ATE plotting different descriptives:

- Minimum weight

- Maximum weight

- % Negative

- Sum largest 10%

- Sum of weights

- Sum of absolute weights

Especially the sum of weights is interesting in light of the paper. It shows that tuning also produces weight sums closer to one in this data set. However, also negative weights are fewer and less pronounced for the tuned version:

```{r}
summary_weights_cf_default = summary(omega_cf_default, quiet = TRUE)
summary_weights_cf_tuned = summary(omega_cf_tuned, quiet = TRUE)
summary_weights_if_default = summary(omega_if_default, quiet = TRUE)
summary_weights_if_tuned = summary(omega_if_tuned, quiet = TRUE)

for (i in 1:dim(summary_weights_if_tuned)[3]) {
  # Extract untreated and treated weights for each group
  sum_weights_cf_default_untreated = summary_weights_cf_default[1,,i]
  sum_weights_cf_default_treated = summary_weights_cf_default[2,,i]
  
  sum_weights_cf_tuned_untreated = summary_weights_cf_tuned[1,,i]
  sum_weights_cf_tuned_treated = summary_weights_cf_tuned[2,,i]
  
  sum_weights_if_default_untreated = summary_weights_if_default[1,,i]
  sum_weights_if_default_treated = summary_weights_if_default[2,,i]
  
  sum_weights_if_tuned_untreated = summary_weights_if_tuned[1,,i]
  sum_weights_if_tuned_treated = summary_weights_if_tuned[2,,i]
  
  # Combine all vectors into a single data frame, with a new 'Treatment' column
  df_weights <- data.frame(
    Value = c(
      sum_weights_cf_default_untreated, sum_weights_cf_default_treated,
      sum_weights_cf_tuned_untreated, sum_weights_cf_tuned_treated,
      sum_weights_if_default_untreated, sum_weights_if_default_treated,
      sum_weights_if_tuned_untreated, sum_weights_if_tuned_treated
    ),
    Group = factor(c(
      rep("CF default", length(sum_weights_cf_default_untreated) + length(sum_weights_cf_default_treated)),
      rep("CF tuned", length(sum_weights_cf_tuned_untreated) + length(sum_weights_cf_tuned_treated)),
      rep("IF default", length(sum_weights_if_default_untreated) + length(sum_weights_if_default_treated)),
      rep("IF tuned", length(sum_weights_if_tuned_untreated) + length(sum_weights_if_tuned_treated))
    )),
    Treatment = factor(c(
      rep("Untreated", length(sum_weights_cf_default_untreated)),
      rep("Treated", length(sum_weights_cf_default_treated)),
      rep("Untreated", length(sum_weights_cf_tuned_untreated)),
      rep("Treated", length(sum_weights_cf_tuned_treated)),
      rep("Untreated", length(sum_weights_if_default_untreated)),
      rep("Treated", length(sum_weights_if_default_treated)),
      rep("Untreated", length(sum_weights_if_tuned_untreated)),
      rep("Treated", length(sum_weights_if_tuned_treated))
    ))
  )
  
  # Plot with ggplot and ggridges
  g <- ggplot(df_weights, aes(x = Value, y = Group, fill = Treatment)) +
    geom_boxplot(position = position_dodge(width = 0.75)) +
    labs(x = dimnames(summary_weights_cf_tuned)[[3]][i], y = NULL) +
    theme_minimal() +
    theme(legend.position = "bottom")  # Place legend at the bottom
  print(g)
}

```

