Part three of the code for Section 3: Heterogeneity in earnings response coefficients
This markdown file contains all the code necessary to replicate the cross-validation prediction test from Section 3: Heterogeneity in earnings response coefficients of the Paper What Can Bayesian Inference Do for Accounting Research?. All the code can also be found in the repo. It contains 00-utils.R
which contains a few helper functions for graphs and tables.
Note: I used the newer cmdstanr package instead of the older rstan package because it likely is the future of the R based Stan ecosystem. I also really like its api, which is very close to the api of the pystan package. An additional advantage (I hope) is thus that most model fitting code should be more or less directly transferable to pystan for those that want to work in python. Installing cmdstanr used to be tricky at times because one needs a working c++ toolchain. But it is much smoother now. Please see the cmdstanr doc for installation instructions
The data used here is generated via the 02-create-ERC-sample.R
script found in the repo. Here, we just load it and do some last minute transformations like de-meaning, etc.
ea_data <- arrow::read_parquet("../data/ea-event-returns.pqt")
ea_data <-
ea_data |>
mutate(
ret_dm = AbEvRet - mean(AbEvRet),
earn_surp_dm = earn_surp - mean(earn_surp)
)
head(ea_data)
# A tibble: 6 x 15
ticker permno fpend_date ea_date actual_eps median_fcast_eps
<chr> <dbl> <date> <date> <dbl> <dbl>
1 001N 14504 2014-12-31 2015-02-09 0.04 0.03
2 001N 14504 2015-03-31 2015-05-05 0.06 -0.045
3 001N 14504 2015-06-30 2015-08-05 0.02 -0.015
4 001N 14504 2015-09-30 2015-11-04 -0.02 0.04
5 002T 14503 2014-06-30 2014-08-07 0.19 0.235
6 002T 14503 2014-09-30 2014-11-04 0.18 0.34
# ... with 9 more variables: num_forecasts <int>,
# two_days_bef_ea <date>, Price <dbl>, earn_surp <dbl>,
# ea_match_date <date>, AbEvRet <dbl>, firm_id <int>, ret_dm <dbl>,
# earn_surp_dm <dbl>
This creates the 10 random splits of the sample
Rows: 10
Columns: 2
$ splits <list> [<vfold_split[60624 x 6736 x 67360 x 15]>], [<vfold_~
$ id <chr> "Fold01", "Fold02", "Fold03", "Fold04", "Fold05", "Fo~
Next, loop through the 10 splits and train the OLS model on each train split and compute predictions for holdout part.
ols_fit <- function(.d){
fit <- lm(ret_dm ~ earn_surp_dm, data = .d)
}
ols_pred <- function(.fit, .nd){
# Note there are some singularity warnings in here. Mainly due to insufficient n.obs
suppressWarnings(
fit_pred <- broom::augment(.fit, newdata = .nd)
)
}
ols_predictions <- vector("list", length = 10)
for (i in 1:10) {
train_set <- analysis(folds$splits[[i]])
holdout_set <- assessment(folds$splits[[i]])
ea_data_train <-
train_set |>
nest(data = -c(ticker, firm_id))
ea_data_test <-
holdout_set |>
nest(data = -c(ticker, firm_id)) |>
rename(test_data = data)
data_slice <-
ea_data_train |>
inner_join(ea_data_test, by = c("ticker", "firm_id"))
ols_predictions[[i]] <-
data_slice |>
mutate(ols_fit = map(.x = data, .f = ~ols_fit(.))) |>
mutate(ols_pred = map2(.x = ols_fit, .y = test_data, .f = ols_pred)) |>
select(ticker, firm_id, ols_pred) |>
unnest(cols = c(ols_pred)) |>
select(ticker, firm_id, ea_date, ret_dm, ols_pred = .fitted)
}
ols_predictions <- bind_rows(ols_predictions)
write_csv(ols_predictions, file = "../out/results/ols-dump.csv")
Do the same for the Bayes model. Loop through the 10 splits and train the Bayes model on each train split and compute predictions for holdout part.
cat(read_lines("../Stan/erc-wkinfo-priors-oos.stan"), sep = "\n")
data{
int<lower=1> N; // num obs
int<lower=1> J; // num groups
int<lower=1> K; // num coefficients
int<lower=1, upper=J> GroupID[N]; // GroupID for obs, e.g. FirmID or Industry-YearID
vector[N] y; // Response
matrix[N, K] x; // Predictors (incl. Intercept)
// data for the oos test
int<lower=1> N_test; // num obs in test sample
matrix[N_test, K] x_test; // Predictors (incl. Intercept)
int<lower=1, upper=J> GroupID_test[N_test]; // GroupID for for test sample obs
}
parameters{
matrix[K, J] z; // standard normal sampler
cholesky_factor_corr[K] L_Omega; // hypprior coefficient correlation
vector<lower=0>[K] tau; // hypprior coefficient scales
vector[K] mu_b; // hypprior mean coefficients
real<lower=0> sigma; // error-term scale
}
transformed parameters{
matrix[J, K] b; // coefficient vector
// The multivariate non-centered version:
b = (rep_matrix(mu_b, J) + diag_pre_multiply(tau,L_Omega) * z)';
}
model{
to_vector(z) ~ normal(0, 1);
L_Omega ~ lkj_corr_cholesky(2);
mu_b[1] ~ normal(0, 0.1);
mu_b[2] ~ normal(0, 40);
sigma ~ exponential(1.0 / 0.08); // exp: 0.08 (std (abnormal returns))
tau[1] ~ exponential(1.0 / 0.1); // exp: 0.1
tau[2] ~ exponential(1.0 / 40); // exp: 40
y ~ normal(rows_dot_product(b[GroupID] , x), sigma);
}
generated quantities {
vector[N_test] y_pred = rows_dot_product(b[GroupID_test] , x_test);
}
model_wkinfo_priors <- cmdstan_model("../Stan/erc-wkinfo-priors-oos.stan")
Beware, the following code chunk can take a long time
bay_predictions <- vector("list", length = 10)
for (i in 1:10){
train_set <- analysis(folds$splits[[i]])
holdout_set <- assessment(folds$splits[[i]])
input_data <- list(
N = nrow(train_set),
J = max(ea_data$firm_id), # important! Needs to refer to full sample
K = 2,
GroupID = train_set$firm_id,
y = train_set$AbEvRet,
x = as.matrix(data.frame(int = 1, esurp = train_set$earn_surp)),
N_test = nrow(holdout_set),
x_test = as.matrix(data.frame(int = 1, esurp = holdout_set$earn_surp)),
GroupID_test = holdout_set$firm_id
)
fit_wkinfo_priors <- model_wkinfo_priors$sample(
data = input_data,
iter_sampling = 1000,
iter_warmup = 1000,
chains = 4,
parallel_chains = 4,
seed = 1234,
refresh = 1000
)
posterior_ypred <- summarise_draws(
fit_wkinfo_priors$draws(c("y_pred")),
posterior_mean = mean,
posterior_median = median,
posterior_sd = sd,
~quantile2(., probs = c(0.05, 0.25, 0.75, 0.95))
)
bay_predictions[[i]] <-
cbind(select(holdout_set, ticker, firm_id, ea_date, ret_dm),
posterior_ypred
)
rm(posterior_ypred, train_set, holdout_set, input_data)
}
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1646.5 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1835.0 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1850.0 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1888.3 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1805.0 seconds.
Total execution time: 1889.0 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1857.0 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1862.4 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1877.6 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1884.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1870.3 seconds.
Total execution time: 1884.5 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1752.9 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1844.6 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1852.3 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1897.3 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1836.8 seconds.
Total execution time: 1897.8 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1811.5 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1833.4 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1858.0 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1869.6 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1843.1 seconds.
Total execution time: 1870.0 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1656.0 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1711.2 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1817.6 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1855.2 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1760.0 seconds.
Total execution time: 1855.8 seconds.
Running MCMC with 4 parallel chains...
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1647.9 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1805.2 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1872.3 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1895.3 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1805.2 seconds.
Total execution time: 1895.9 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1599.3 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1619.7 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1662.0 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1752.3 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1658.3 seconds.
Total execution time: 1752.7 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1818.5 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1832.0 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1898.6 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1959.4 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1877.1 seconds.
Total execution time: 1959.9 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1738.1 seconds.
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1795.9 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1858.2 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1860.0 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1813.1 seconds.
Total execution time: 1860.5 seconds.
Running MCMC with 4 parallel chains...
Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 4 Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 3 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4 finished in 1570.6 seconds.
Chain 3 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3 finished in 1578.9 seconds.
Chain 2 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2 finished in 1814.2 seconds.
Chain 1 Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1 finished in 1829.1 seconds.
All 4 chains finished successfully.
Mean chain execution time: 1698.2 seconds.
Total execution time: 1829.5 seconds.
bay_predictions <- bind_rows(bay_predictions)
write_csv(bay_predictions, file = "../out/results/bay-dump.csv")
ols_predictions <- read_csv("../out/results/ols-dump.csv")
bay_predictions <- read_csv("../out/results/bay-dump.csv")
tab2.A <- bind_rows(
yardstick::mae(bay_predictions, truth = ret_dm, estimate = posterior_mean),
yardstick::rmse(bay_predictions, truth = ret_dm, estimate = posterior_mean),
yardstick::rsq(bay_predictions, truth = ret_dm, estimate = posterior_mean),
) |>
mutate(case = c("Bayes (Full sample)", "Bayes (Full sample)", "Bayes (Full sample)"),
.estimate = round(.estimate, 4)
) |>
select(-.estimator) |>
pivot_wider(names_from = .metric, values_from = .estimate)
tab2.A$N <- nrow(bay_predictions)
kable(tab2.A)
case | mae | rmse | rsq | N |
---|---|---|---|---|
Bayes (Full sample) | 0.0575 | 0.0768 | 0.0339 | 67360 |
both_predictions <-
bay_predictions |>
ungroup() |>
inner_join(select(ols_predictions, ticker, ea_date, ols_pred),
by = c("ticker", "ea_date"))
tab2.B <- bind_rows(
yardstick::mae(both_predictions, truth = ret_dm, estimate = posterior_mean),
yardstick::mae(both_predictions, truth = ret_dm, estimate = ols_pred),
yardstick::rmse(both_predictions, truth = ret_dm, estimate = posterior_mean),
yardstick::rmse(both_predictions, truth = ret_dm, estimate = ols_pred),
yardstick::rsq(both_predictions, truth = ret_dm, estimate = posterior_mean),
yardstick::rsq(both_predictions, truth = ret_dm, estimate = ols_pred)
)|>
mutate(
case = c("Bayes (OLS sample)", "OLS", "Bayes (OLS sample)", "OLS", "Bayes (OLS sample)", "OLS"),
.estimate = round(.estimate, 4)) |>
select(-.estimator) |>
pivot_wider(names_from = .metric, values_from = .estimate)
tab2.B$N <- nrow(both_predictions)
kable(tab2.B)
case | mae | rmse | rsq | N |
---|---|---|---|---|
Bayes (OLS sample) | 0.0575 | 0.0767 | 0.0339 | 67031 |
OLS | 0.0669 | 0.4048 | 0.0000 | 67031 |
bind_rows(tab2.A, tab2.B) |> write_csv("../out/results/tab2.csv")
If you see mistakes or want to suggest changes, please create an issue on the source repository.
Text and figures are licensed under Creative Commons Attribution CC BY 4.0. Source code is available at https://github.com/hschuett/BayesForAccountingResearch, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".