Table of Contents
The methods explanations throughout the notebook are kept short. Please see the paper for more details regarding the decomposition parameters and their estimation.
Click on the “Code” button on the top right of this notebook to “Hide All Code” and see only the results or “Download Rmd” to extract the underlying code.
Load packages, project specific functions, and set seed:
# Load required libraries
library(sas7bdat)
library(causalDML)
library(tidyverse)
library(grf)
library(patchwork)
library(dplyr)
library(knitr)
library(viridis)
# Load project specific functions
source("/home/rstudio/code/probs_all_v1.R")
# Set seed
seed = 1234
set.seed(seed)
Load the data that were obtained from impact.sas7bdat, baseline.sas7bdat, key_vars.sas7bdat, and mileston.sas7bdat files from OPENICPSR on March 24, 2025 (usually takes some minutes).
# Load datasets downloaded from https://www.openicpsr.org/openicpsr/project/113269/version/V1/view
impact_raw = read.sas7bdat("/home/rstudio/raw_data/impact.sas7bdat")
baseline_raw = read.sas7bdat("/home/rstudio/raw_data/baseline.sas7bdat")
key_vars_raw = read.sas7bdat("/home/rstudio/raw_data/key_vars.sas7bdat")
mileston_raw = read.sas7bdat("/home/rstudio/raw_data/mileston.sas7bdat")
rand_raw = read.sas7bdat("/home/rstudio/raw_data/rand_dat.sas7bdat")
Prepare data for analysis:
# Extract the relevant variables
impact = select(impact_raw,MPRID,EARNY4,EVERJCH)
impact_temp = select(impact_raw,MPRID,JCVCLERC,JCVHLTH,JCVAUTO,JCVWELD,JCVELTRC,JCVCSTRC,JCVFOOD,JCVETRNC,JCVOTH)
baseline = select(baseline_raw,MPRID,RACE_ETH,HS_D,GED_D,VOC_D,ANY_ED1,HGC,NTV_LANG,WKEARNR,
HASCHLD,MARRIAGE,R_HEAD,HHMEMB,HEALTH,PY_CIG,PY_ALCHL,
PY_POT,HADWORRY,HEAR_JC,KNEWCNTR,E_MATH,E_READ,E_ALONG,
E_CONTRL,E_ESTEEM,E_SPCJOB,E_FRIEND,KNEW_JC,EARN_YR)
key_vars = select(key_vars_raw,MPRID,TREATMNT,FEMALE,AGE_CAT,EDUC_GR,NONRES,IN57)
mileston = select(mileston_raw,MPRID,LIVESPOU,EVERWORK,YR_WORK,CURRJOB,
JOB0_3, JOB3_9, JOB9_12, MOSTWELF,GOT_AFDC,GOT_FS,ED0_6,ED6_12,
PUBLICH,BADHLTH,HARDUSE,POTUSE,EVARRST,PMSA,MSA,PRARRI)
rand = select(rand_raw,MPRID,RAND_WK)
# Code the versions
versions = rep("Nothing",nrow(impact))
versions[impact_temp[,2] == 1] = "Clerical"
versions[impact_temp[,3] == 1] = "Health"
versions[impact_temp[,4] == 1] = "Auto"
versions[impact_temp[,5] == 1] = "Welding"
versions[impact_temp[,6] == 1 | impact_temp[,9] == 1] = "Electrical"
versions[impact_temp[,7] == 1] = "Construction"
versions[impact_temp[,8] == 1] = "Food"
versions[impact_temp[,10] == 1] = "Other"
versions[rowSums(impact_temp[,2:10],na.rm=T) > 1] = "Multiple"
impact = cbind(impact,versions)
# Merge the data
inner_join(impact,baseline,by="MPRID") %>% inner_join(key_vars,by="MPRID") %>% inner_join(mileston,by="MPRID") %>% inner_join(rand,by="MPRID") %>%
mutate(RACE_W = as.numeric(RACE_ETH == 1)) %>% mutate(RACE_B = as.numeric(RACE_ETH == 2)) %>%
mutate(RACE_H = as.numeric(RACE_ETH == 3)) %>% mutate(RACE_O = as.numeric(RACE_ETH == 4)) %>%
mutate(WKEARNR = replace(WKEARNR,is.na(WKEARNR),0)) %>%
mutate(EARN_YR = replace(EARN_YR,is.na(EARN_YR),0)) -> db
# Remove observations with missing values
db %>% na.omit() -> db
# Create main variables
Y = as.matrix(select(db,EARNY4))
D = as.matrix(select(db,TREATMNT))
# Heterogeneity variable
female = select(db,FEMALE)
fem_mat = cbind(1-female,female)
label_fem = c("Male","Female")
colnames(fem_mat) = label_fem
female = factor(ifelse(female == 1, "female", "male"),
levels = c("male", "female"))
# Create versions
JC = db$EVERJCH
Tr = rep(NA,length(Y))
Tr[D == 0] = "Control"
Tr[D == 1 & JC == 0] = "No JC"
Tr[D == 1 & JC == 1 & db$versions == "Clerical"] = "Clerical"
Tr[D == 1 & JC == 1 & db$versions == "Health"] = "Health"
Tr[D == 1 & JC == 1 & db$versions == "Auto"] = "Auto"
Tr[D == 1 & JC == 1 & db$versions == "Welding"] = "Welding"
Tr[D == 1 & JC == 1 & db$versions == "Electrical"] = "Electrical"
Tr[D == 1 & JC == 1 & db$versions == "Construction"] = "Construction"
Tr[D == 1 & JC == 1 & db$versions == "Food"] = "Food"
Tr[D == 1 & JC == 1 & db$versions == "Other"] = "Other"
Tr[D == 1 & JC == 1 & db$versions == "Multiple"] = "Multiple"
Tr[is.na(Tr)] = "JC w/o voc"
label_T = c("Control","No JC","JC w/o voc","Clerical","Health","Auto","Welding","Electrical",
"Construction","Food","Other","Multiple")
Tr = factor(Tr,levels=label_T)
# Create control matrix
X = as.matrix(select(db,FEMALE,AGE_CAT,RACE_W,RACE_B,RACE_H,RACE_O,EDUC_GR,
LIVESPOU,EVERWORK,YR_WORK,CURRJOB,JOB0_3, JOB3_9, JOB9_12,
MOSTWELF,GOT_AFDC,GOT_FS,ED0_6,ED6_12,PUBLICH,BADHLTH,HARDUSE,POTUSE,EVARRST,PMSA,MSA,
HS_D,GED_D,VOC_D,ANY_ED1,HGC,NTV_LANG,WKEARNR,
HASCHLD,MARRIAGE,R_HEAD,HHMEMB,HEALTH,PY_CIG,PY_ALCHL,
PY_POT,HADWORRY,HEAR_JC,KNEWCNTR,E_MATH,E_READ,E_ALONG,
E_CONTRL,E_ESTEEM,E_SPCJOB,E_FRIEND,KNEW_JC,NONRES,PRARRI,EARN_YR,RAND_WK,IN57))
The nuisance parameters are obtained using the multi-valued treatment effect implementation of the causalDML package. For the outcome models, we switch off the honesty option, because sample size in some effective treatments is too small to tune parameters with honesty (ran roughly half an hour).
# Define methods
forest_hon = create_method("forest_grf",args=list(tune.parameters = "all")) # for pscores
forest = create_method("forest_grf",args=list(honesty = F,tune.parameters = "all")) # for outcome b/c too few observations for honest RF
# Run multiple treatment model to get nuisance parameters
aipw = DML_aipw(Y,Tr,X,
ml_w=list(forest_hon),
ml_y=list(forest),
quiet=T, # Switch off to see progress
cf = 5)
Run the decomposition:
# Run HK decomp
results = HK2_decomposition(Y,D,female,
aipw$APO$w_mat,
aipw$APO$e_mat,
aipw$APO$m_mat)
# Print the setting
results
================= Setting Summary =================
Effective treatments: Control
Aggregated into: 0
Effective treatments: No JC, JC w/o voc, Clerical, Health, Auto, Welding, Electrical, Construction, Food, Other, Multiple
Aggregated into: 1
Heterogeneity variables: Unconditional, male, female
Print, plot and save results for DiM:
g = plot(results,levels = T, pe_digits = 1,decomposition = "dim")
g
ggsave("/home/rstudio/results/DiM.png",g,width = 7,height=4)
Delta_dim = summary(results,parameter = "dim", t_aggregate = c(3,2),x_aggregate = c(3,2)) # Female - Male
================= HK decomposition results for =================
Parameters dim
Treatment aggregation contrast: 1 - 0
Heterogeneity contrast: female - male
Estimate SE t-value p-value
DiM -7.61422 7.81877 -0.9738 0.34666
Δ0 0.00000 0.00000 0.0000 1.00000
Δ1 -0.33622 7.68273 -0.0438 0.96571
Δ2 -6.16894 2.33089 -2.6466 0.01916 *
Δ3 -0.40977 0.51961 -0.7886 0.44349
Δ4 -0.69929 2.70445 -0.2586 0.79973
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Print, plot and save results for adjusted DiM:
g = plot(results,levels = T, pe_digits = 1,decomposition = "adim")
g
ggsave("/home/rstudio/results/ADiM.png",g,width = 7,height=4)
summary(results,parameter = "adim", t_aggregate = c(3,2),x_aggregate = c(3,2)) # Female - Male
================= HK decomposition results for =================
Parameters adim
Treatment aggregation contrast: 1 - 0
Heterogeneity contrast: female - male
Estimate SE t-value p-value
ADiM -7.993378 7.485126 -1.0679 0.30363
Δ0 0.000000 0.000000 0.0000 1.00000
Δ1 -0.336222 7.682731 -0.0438 0.96571
Δ2 -6.168936 2.330894 -2.6466 0.01916 *
Δ3 -0.409768 0.519608 -0.7886 0.44349
Δ4' -1.090668 1.566117 -0.6964 0.49757
Δ5 0.012215 0.053829 0.2269 0.82377
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Create the condensed figure in the introduction:
res = Delta_dim[3:6,c(1,4)]
# Original labels
base_labels <- c("Effect heterogeneity",
"Group targeting of average outcomes",
"Group targeting of group outcomes",
"Individualized targeting")
# Construct labels with p-values (rounded to 3 decimal places)
full_labels <- paste0(base_labels, " (p-value: ", sprintf("%.3f", res[,2]), ")")
# Create factor with reversed order to preserve top-to-bottom layout
df <- data.frame(
label = factor(full_labels, levels = rev(full_labels)),
estimate = res[,1]
)
# Plot without confidence intervals
g = ggplot(df, aes(x = estimate, y = label)) +
geom_bar(stat = "identity", fill = viridis(1,alpha=0.9, begin=0.3)) +
geom_vline(xintercept = 0) +
labs(x = "Estimate in $", y = "") +
theme_minimal() +
theme(axis.text.y = element_text(size = 10)) + xlim(-8,0)
g
ggsave("/home/rstudio/results/Intro_graph_p.png",g,width = 7,height=2)
Save image.
save.image("/home/rstudio/results/Image.RData")
We plot fractions of effective treatments by gender to illustrate how different the Job Corps experience is with respect to vocational training received:
g1 = data.frame(x=factor(female), Version=factor(Tr,levels=rev(label_T))) %>%
ggplot(aes(x=x, fill=Version))+geom_bar(position = "fill", colour="black",linewidth=0.01) +
ylab("Fraction") + scale_fill_grey(start = 0, end = 1) + theme_bw() +
theme(axis.title.x = element_blank())
g2 = data.frame(x=factor(female), Version=factor(Tr,levels=rev(label_T))) %>%
filter(Version != "Control") %>% filter(Version != "No JC") %>% filter(Version != "JC without voc") %>%
ggplot(aes(x=x, fill=Version)) + geom_bar(position = "fill", colour="black",linewidth=0.01) +
ylab("Fraction conditional on vocational training") + scale_fill_grey(start = 0, end = 0.9) +
theme_bw() + theme(axis.title.x = element_blank(), legend.position = "none")
g = g1 + g2
g
See also the numbers here:
# Create a data frame with the relevant variables
df = data.frame(female = female, Tr = Tr)
# Compute the percentage shares
tab_female = prop.table(table(df$Tr[df$female == "female"])) * 100
tab_male = prop.table(table(df$Tr[df$female == "male"])) * 100
tab_all = prop.table(table(df$Tr)) * 100
# Combine the results into a table with formatted percentages
table_shares = cbind(
Female = sprintf("%1.1f", tab_female),
Male = sprintf("%1.1f", tab_male),
All = sprintf("%1.1f", tab_all)
)
# Ensure the row names are consistent with the overall table
rownames(table_shares) = names(tab_all)
# Print the table as a data frame
as.data.frame(table_shares)
The main text discusses how the weak effective overlap assumption A.4 can be inspected by checking scaled propensity scores. Here we plot the scaled distributions:
# Convert the matrix to a data frame and add an identifier for rows
df <- as.data.frame(aipw$APO$e_mat[,-1]) %>%
mutate(id = row_number()) %>%
pivot_longer(cols = -id, names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
# Divide each value by the mean of its respective column
mutate(norm_value = value / mean(value)) %>%
ungroup()
# Plot histograms with density on the y-axis and shared x-axis across facets
ggplot(df, aes(x = norm_value)) +
geom_histogram(aes(y = after_stat(density)), bins = 30, fill = "blue", color = "black") +
facet_wrap(~ variable) + # no scales = "free_x"
labs(x = "Scaled p-score", y = "Density") +
theme_minimal()
The minimum scaled propensity score is 0.21 and therefore much larger than what usually would be considered as problematic in standard settings. For most of the trainings, we observe a bimodal shape. This is because gender is the most important predictor of vocational training and random forests are capable of capturing this pattern.
Using sampling weights to account for the not completely random sampling procedure of the Job Corps evaluation shows no qualitative differences to main results:
# Create sampling weights
sampling_weights = props_all(db)$weights_Rx
# For comparison purposes run with sampling weights
resultsw = HK2_decomposition(Y,D,female,
aipw$APO$w_mat,
aipw$APO$e_mat,
aipw$APO$m_mat,
sampling_weights = sampling_weights)
g = plot(resultsw,levels = T, pe_digits = 1,decomposition = "dim",
t_aggregate = c(3,2),x_aggregate = c(3,2))
g
ggsave("/home/rstudio/results/DiMw.png",g,width = 7,height=4)
g = plot(resultsw,levels = T, pe_digits = 1,decomposition = "adim",
t_aggregate = c(3,2),x_aggregate = c(3,2))
g
ggsave("/home/rstudio/results/ADiMw.png",g,width = 7,height=4)
summary(resultsw,parameter = "dim", t_aggregate = c(3,2),x_aggregate = c(3,2))
================= HK decomposition results for =================
Parameters dim
Treatment aggregation contrast: 1 - 0
Heterogeneity contrast: female - male
Estimate SE t-value p-value
DiM -5.96499 7.77516 -0.7672 0.45572
Δ0 0.00000 0.00000 0.0000 1.00000
Δ1 0.75562 7.88379 0.0958 0.92500
Δ2 -5.89218 2.35030 -2.5070 0.02512 *
Δ3 -0.26037 0.41460 -0.6280 0.54011
Δ4 -0.11920 2.73299 -0.0436 0.96583
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(resultsw,parameter = "adim", t_aggregate = c(3,2),x_aggregate = c(3,2))
================= HK decomposition results for =================
Parameters adim
Treatment aggregation contrast: 1 - 0
Heterogeneity contrast: female - male
Estimate SE t-value p-value
ADiM -7.0276905 7.4421474 -0.9443 0.36102
Δ0 0.0000000 0.0000000 0.0000 1.00000
Δ1 0.7556183 7.8837863 0.0958 0.92500
Δ2 -5.8921751 2.3502972 -2.5070 0.02512 *
Δ3 -0.2603703 0.4145969 -0.6280 0.54011
Δ4' -1.1843630 1.5535925 -0.7623 0.45851
Δ5 -0.0071592 0.0646172 -0.1108 0.91335
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
The decomposition indicates that there is no real effect heterogeneity at the level of the effective treatment. However, \(\Delta_1\) is only a necessary test for strong group effect homogeneity as discussed in Section 4.6 of the paper. Thus, we investigate effect heterogeneity for each effective treatment to see whether significant effect heterogeneity is masked by the decomposition estimand.
First, we estimate group average treatment effect (GATE) differences, i.e. \(E[Y(t)-Y(0)|female] - E[Y(t)-Y(0)|male]\), for each treatment version:
# Initialize DiM for each effective treatment and IFs for inference later
n = length(Y)
J = length(label_T)-1
G = (female == "female")*1
dGATEs = matrix(NA,J,5)
rownames(dGATEs) = label_T[-1]
colnames(dGATEs) = c("GATE","S.E.","t-value","pointwise p-value","uniform p-value")
IFs = matrix(NA,length(Y),J)
for (j in 1:J) {
dGATEs[j,1] = mean(aipw$ATE$delta[,j] * G / mean(G) - aipw$ATE$delta[,j] * (1-G) / mean(1-G))
IFs[,j] = aipw$ATE$delta[,j] * G / mean(G) - aipw$ATE$delta[,j] * (1-G) / mean(1-G) - dGATEs[j]
}
# Estimate the covariance matrix via IF
Sigma_hat = var(IFs)
# Collect point estimates and SEs
dGATEs[,2] = sqrt(diag(Sigma_hat) / n)
dGATEs[,3] = dGATEs[,1] / dGATEs[,2]
dGATEs[,4] = 2 * (1 - pt(abs(dGATEs[,3]), n))
sup_t = function(dimj0,Ytilde,G,alpha = 0.05,reps = 10000) {
n = nrow(Ytilde)
J = ncol(Ytilde)
# browser()
largest_ts = rep(0,reps)
for (i in 1:reps) {
# Generate bootstrap indices
bootstrap_indices = sample(1:n, size = n, replace = TRUE)
# Bootstrap samples
bootstrap_Ytilde = Ytilde[bootstrap_indices,]
bootstrap_G = G[bootstrap_indices]
for (j in 1:J) {
mean_1 = mean(bootstrap_Ytilde[bootstrap_G == 1,j])
mean_0 = mean(bootstrap_Ytilde[bootstrap_G == 0,j])
dim_boot = mean_1 - mean_0
var_1 = var(bootstrap_Ytilde[bootstrap_G == 1,j])
var_0 = var(bootstrap_Ytilde[bootstrap_G == 0,j])
n_1 = sum(bootstrap_G == 1)
n_0 = sum(bootstrap_G == 0)
se_boot = sqrt(var_1 / n_1 + var_0 / n_0)
t_val = abs((dim_boot - dimj0[j]) / se_boot)
if (is.na(t_val)) next
else if (t_val > largest_ts[i]) largest_ts[i] = t_val
}
}
CV = quantile(largest_ts, probs = 1 - alpha)
return(list("CritVal" = CV, "largest_ts" = largest_ts))
}
supt = sup_t(dGATEs[,1],aipw$ATE$delta[,1:J],G)
for (j in 1:J) {
dGATEs[j,5] = mean(supt$largest_ts > abs(dGATEs[j,3]))
}
dGATEs
GATE S.E. t-value pointwise p-value uniform p-value
No JC 0.4345983 11.52285 0.0377162 0.9699147 1.0000
JC w/o voc -7.3774050 12.83993 -0.5745675 0.5655972 0.9998
Clerical -23.7203674 19.70356 -1.2038621 0.2286722 0.9358
Health 21.3105338 23.75711 0.8970169 0.3697321 0.9917
Auto 40.0176808 33.99475 1.1771724 0.2391556 0.9450
Welding 14.3231329 32.94165 0.4348031 0.6637150 1.0000
Electrical -15.6003258 46.98573 -0.3320226 0.7398793 1.0000
Construction 9.0027448 23.12807 0.3892562 0.6970952 1.0000
Food 27.7936581 24.47049 1.1358031 0.2560670 0.9558
Other 3.0522618 16.03259 0.1903786 0.8490164 1.0000
Multiple -25.7210440 18.79120 -1.3687815 0.1710993 0.8654
We observe that each estimate is imprecisely estimated and even the pointwise p-values are quite large.
Finally, we test the joint hypothesis of all GATE differences being zero using a Wald and a supremum test:
# Compute the Wald statistic and p-value
W = t(dGATEs[,1]) %*% solve(Sigma_hat / n) %*% dGATEs[,1]
W_p_value = 1 - pchisq(W, df = J)
# Compute supt p-value
supt_p_value = mean(supt$largest_ts > max(abs(dGATEs[,3])))
# Use sprintf to align numbers
output = sprintf("Wald p-value: %.4f\nSupremum p-value: %.4f", W_p_value, supt_p_value)
cat(output)
Wald p-value: 0.6942
Supremum p-value: 0.8654
Like \(\Delta_1\), the direct tests of strong group effect homogeneity are insignificant.
First plot the underlying average potential outcomes:
plot(aipw$APO)
Not surprisingly they are imprecisely estimated. However, we observe a pattern that the training in which women are overrepresented - clerical, health and food - have relatively low estimated average potential outcomes. This is driving the gender differences.
Then, we can print estimates of all parameters discussed throughout the manucript for completeness.
# Define parameters to be displays and their order
param_select = c(1,2,4,5,6,3,7,8,9,11,13,14)
First, outcome levels for men
summary(results,parameter = param_select, t_aggregate = 2,x_aggregate = 2) # A = 0
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 0
Heterogeneity level: male
Estimate SE t-value p-value
CM 221.16570 3.96844 55.7312 < 2.2e-16 ***
ACM 221.91192 3.95426 56.1197 < 2.2e-16 ***
s1 221.91192 3.95426 56.1197 < 2.2e-16 ***
s2 195.78072 2.91088 67.2583 < 2.2e-16 ***
s3 221.91192 3.95426 56.1197 < 2.2e-16 ***
d0 195.78072 2.91088 67.2583 < 2.2e-16 ***
d1 26.13120 2.48265 10.5255 4.935e-08 ***
d2 0.00000 0.00000 0.0000 1.0000
d3 0.00000 0.00000 0.0000 1.0000
d4 -0.74623 0.87780 -0.8501 0.4096
d4' 0.00000 0.00000 0.0000 1.0000
d5 0.00000 0.00000 0.0000 1.0000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(results,parameter = param_select, t_aggregate = 3,x_aggregate = 2) # A = 1
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 1
Heterogeneity level: male
Estimate SE t-value p-value
CM 241.074985 3.921212 61.4797 < 2.2e-16 ***
ACM 240.063266 3.805680 63.0803 < 2.2e-16 ***
s1 238.375842 3.988827 59.7609 < 2.2e-16 ***
s2 214.913643 2.870742 74.8634 < 2.2e-16 ***
s3 240.513381 3.999296 60.1389 < 2.2e-16 ***
d0 212.101366 2.746747 77.2191 < 2.2e-16 ***
d1 26.274477 2.274935 11.5495 1.526e-08 ***
d2 2.812278 1.063576 2.6442 0.01925 *
d3 -0.674739 0.853093 -0.7909 0.44218
d4 0.561604 1.275787 0.4402 0.66651
d4' -0.466254 1.124392 -0.4147 0.68467
d5 0.016139 0.034817 0.4635 0.65011
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
and their contrast
# Male contrast in treatment aggregations
summary(results,parameter = param_select, t_aggregate = c(3,2),x_aggregate = 2)
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation contrast: 1 - 0
Heterogeneity level: male
Estimate SE t-value p-value
DiM 19.909288 5.578924 3.5687 0.0030845 **
ADiM 18.151343 5.368024 3.3814 0.0044756 **
s1 16.463920 5.498158 2.9944 0.0096573 **
s2 19.132919 3.989715 4.7956 0.0002849 ***
s3 18.601458 5.505695 3.3786 0.0045006 **
δ0 16.320641 3.898079 4.1868 0.0009136 ***
δ1 0.143279 3.273949 0.0438 0.9657112
δ2 2.812278 1.063576 2.6442 0.0192468 *
δ3 -0.674739 0.853093 -0.7909 0.4421771
δ4 1.307830 1.800249 0.7265 0.4795208
δ4' -0.466254 1.124392 -0.4147 0.6846659
δ5 0.016139 0.034817 0.4635 0.6501077
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Second, outcome levels for women
summary(results,parameter = param_select, t_aggregate = 2,x_aggregate = 3) # A = 0
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 0
Heterogeneity level: female
Estimate SE t-value p-value
CM 159.5534 4.4309 36.0094 3.339e-15 ***
ACM 160.5917 4.2170 38.0823 1.536e-15 ***
s1 160.5917 4.2170 38.0823 1.536e-15 ***
s2 195.7807 2.9109 67.2583 < 2.2e-16 ***
s3 160.5917 4.2170 38.0823 1.536e-15 ***
d0 195.7807 2.9109 67.2583 < 2.2e-16 ***
d1 -35.1890 3.3317 -10.5620 4.726e-08 ***
d2 0.0000 0.0000 0.0000 1.0000
d3 0.0000 0.0000 0.0000 1.0000
d4 -1.0383 1.1759 -0.8831 0.3921
d4' 0.0000 0.0000 0.0000 1.0000
d5 0.0000 0.0000 0.0000 1.0000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(results,parameter = param_select, t_aggregate = 3,x_aggregate = 3) # A = 1
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 1
Heterogeneity level: female
Estimate SE t-value p-value
CM 171.848444 3.221199 53.3492 < 2.2e-16 ***
ACM 170.749687 3.234096 52.7967 < 2.2e-16 ***
s1 176.719420 3.470609 50.9188 < 2.2e-16 ***
s2 208.744708 3.099585 67.3460 < 2.2e-16 ***
s3 172.278255 3.479212 49.5165 < 2.2e-16 ***
d0 212.101366 2.746747 77.2191 < 2.2e-16 ***
d1 -35.381946 3.048336 -11.6070 1.432e-08 ***
d2 -3.356658 1.268724 -2.6457 0.01919 *
d3 -1.084507 1.371320 -0.7908 0.44222
d4 -0.429812 1.243008 -0.3458 0.73465
d4' -1.556922 1.090167 -1.4281 0.17518
d5 0.028353 0.041053 0.6907 0.50108
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
and their contrast
# Male contrast in treatment aggregations
summary(results,parameter = param_select, t_aggregate = c(3,2),x_aggregate = 3)
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation contrast: 1 - 0
Heterogeneity level: female
Estimate SE t-value p-value
DiM 12.295069 5.478033 2.2444 0.0414871 *
ADiM 10.157964 5.216458 1.9473 0.0718435 .
s1 16.127698 5.368436 3.0042 0.0094729 **
s2 12.963983 4.154303 3.1206 0.0075181 **
s3 11.686533 5.369378 2.1765 0.0471270 *
δ0 16.320641 3.898079 4.1868 0.0009136 ***
δ1 -0.192943 4.408783 -0.0438 0.9657111
δ2 -3.356658 1.268724 -2.6457 0.0191897 *
δ3 -1.084507 1.371320 -0.7908 0.4422242
δ4 0.608536 2.018202 0.3015 0.7674475
δ4' -1.556922 1.090167 -1.4281 0.1751764
δ5 0.028353 0.041053 0.6907 0.5010756
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
For completeness also check the unconditional estimands that naturally do not represent a heterogeneity but rather decompose average estimands for average potential outcomes:
# Unconditional levels
summary(results,parameter = param_select, t_aggregate = 2,x_aggregate = 1) # A = 0
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 0
Heterogeneity level: Unconditional
Estimate SE t-value p-value
CM 197.63124 3.01814 65.4812 < 2e-16 ***
ACM 195.78072 2.91088 67.2583 < 2e-16 ***
s1 195.78072 2.91088 67.2583 < 2e-16 ***
s2 195.78072 2.91088 67.2583 < 2e-16 ***
s3 195.78072 2.91088 67.2583 < 2e-16 ***
d0 195.78072 2.91088 67.2583 < 2e-16 ***
d1 0.00000 0.00000 0.0000 1.00000
d2 0.00000 0.00000 0.0000 1.00000
d3 0.00000 0.00000 0.0000 1.00000
d4 1.85052 0.77287 2.3943 0.03121 *
d4' 0.00000 0.00000 0.0000 1.00000
d5 0.00000 0.00000 0.0000 1.00000
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(results,parameter = param_select, t_aggregate = 3,x_aggregate = 1) # A = 1
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation level: 1
Heterogeneity level: Unconditional
Estimate SE t-value p-value
CM 209.516178 2.629379 79.6828 < 2e-16 ***
ACM 210.525742 2.605745 80.7929 < 2e-16 ***
s1 212.101366 2.746747 77.2191 < 2e-16 ***
s2 212.101366 2.746747 77.2191 < 2e-16 ***
s3 212.101366 2.746747 77.2191 < 2e-16 ***
d0 212.101366 2.746747 77.2191 < 2e-16 ***
d1 0.000000 0.000000 0.0000 1.00000
d2 0.000000 0.000000 0.0000 1.00000
d3 0.000000 0.000000 0.0000 1.00000
d4 -2.585188 1.249083 -2.0697 0.05746 .
d4' -1.781477 1.158354 -1.5379 0.14635
d5 0.205853 0.074312 2.7701 0.01504 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Similarly for the average treatment effect:
summary(results,parameter = param_select, t_aggregate = c(3,2),x_aggregate = 1) # A = 1 - A = 0
================= HK decomposition results for =================
Parameters 1, 2, 4, 5, 6, 3, 7, 8, 9, 11, 13, 14
Treatment aggregation contrast: 1 - 0
Heterogeneity level: Unconditional
Estimate SE t-value p-value
DiM 11.884936 4.002846 2.9691 0.0101541 *
ADiM 14.745017 3.799012 3.8813 0.0016619 **
s1 16.320641 3.898079 4.1868 0.0009136 ***
s2 16.320641 3.898079 4.1868 0.0009136 ***
s3 16.320641 3.898079 4.1868 0.0009136 ***
δ0 16.320641 3.898079 4.1868 0.0009136 ***
δ1 0.000000 0.000000 0.0000 1.0000000
δ2 0.000000 0.000000 0.0000 1.0000000
δ3 0.000000 0.000000 0.0000 1.0000000
δ4 -4.435705 1.637275 -2.7092 0.0169484 *
δ4' -1.781477 1.158354 -1.5379 0.1463543
δ5 0.205853 0.074312 2.7701 0.0150384 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1