#----------------------------------------------------------------------------------------------------
# title: "Combination of inflammatory and vascular markers in the febrile phase of dengue is associated with more severe outcomes"
# author: "Nguyen Lam Vuong"
# date: "29-Jul-2021"
# SOME FUNCTIONS FOR USE IN THE ANALYSIS

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# Load packages
library(tidyverse)
library(MuMIn)
source("Elife ERA functions.R") # to include my functions
options(na.action = "na.fail") # for 'dredge' function [MuMIn]

# Load data
dat <- read_csv("Dengue_Biomarkers_data_27Jul2021.csv") %>% 
  filter(Time == "Enrolment") %>%
  mutate(VCAM = ifelse(uVCAM==1, log2(0.028), log2(VCAM1)),
         SDC = log2(SDC1),
         Ang = ifelse(uAng==1, log2(17.1), log2(Ang2)),
         IL8 = ifelse(uIL8==1, log2(1.8), log2(IL8)),
         IP10 = ifelse(uIP10==1, log2(1.18), log2(IP10)),
         IL1RA = ifelse(uIL1RA==1, log2(18), log2(IL1RA)),
         CD163 = log2(CD163),
         TREM = ifelse(uTREM==1, log2(10.65), log2(TREM1)),
         Fer = log2(Fer),
         CRP = log2(CRP), 
         Vir = log10(Viremia)) %>%
  select(Code, Age, age15, sev.or.inte, VCAM, SDC, Ang, IL8, IP10, IL1RA, CD163, TREM, Fer, CRP)

dat1 <- dat %>% filter(age15 == "No") ## Data for children
dat2 <- dat %>% filter(age15 == "Yes") ## Data for adults

#----------------------------------------------------------------------------------------------------
## The following codes were modified from Heinze G. et al (https://doi.org/10.1002/bimj.201700067)

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# FOR CHILDREN

#----------------------------------------------------------------------------------------------------
# Initial setup

pred <- c("VCAM", "SDC", "Ang", "IL8", "IP10", "IL1RA", "CD163", "TREM", "Fer", "CRP")
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat1, x=T, y=T)
full_est <- coef(full_mod)
pred_name <- names(full_est)[-1]
bootnum <- 1000

set.seed(51086)

#----------------------------------------------------------------------------------------------------
# Bootstrap for best of all combinations for children
boot_varc <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_estc <-  boot_sec <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_secl <- dredge(boot_m, rank="AIC")
  boot_varc_tmp <- attr(get.models(boot_secl, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_varc)-1)) {
    boot_varc[i,j] <- ifelse(names(boot_varc[i,j]) %in% boot_varc_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_varc[i,][boot_varc[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_varc[i, ncol(boot_varc)] <- AIC(boot_mod)
  boot_estc[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_sec[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 2 variables for children

boot_var2c <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est2c <-  boot_se2c <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se2cl <- dredge(boot_m, rank="AIC", m.lim=c(2,2))
  boot_var2c_tmp <- attr(get.models(boot_se2cl, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var2c)-1)) {
    boot_var2c[i,j] <- ifelse(names(boot_var2c[i,j]) %in% boot_var2c_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var2c[i,][boot_var2c[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var2c[i, ncol(boot_var2c)] <- AIC(boot_mod)
  boot_est2c[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se2c[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 3 variables for children

boot_var3c <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est3c <-  boot_se3c <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se3cl <- dredge(boot_m, rank="AIC", m.lim=c(3,3))
  boot_var3c_tmp <- attr(get.models(boot_se3cl, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var3c)-1)) {
    boot_var3c[i,j] <- ifelse(names(boot_var3c[i,j]) %in% boot_var3c_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var3c[i,][boot_var3c[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var3c[i, ncol(boot_var3c)] <- AIC(boot_mod)
  boot_est3c[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se3c[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 4 variables for children

boot_var4c <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est4c <-  boot_se4c <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se4cl <- dredge(boot_m, rank="AIC", m.lim=c(4,4))
  boot_var4c_tmp <- attr(get.models(boot_se4cl, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var4c)-1)) {
    boot_var4c[i,j] <- ifelse(names(boot_var4c[i,j]) %in% boot_var4c_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var4c[i,][boot_var4c[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var4c[i, ncol(boot_var4c)] <- AIC(boot_mod)
  boot_est4c[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se4c[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 5 variables for children

boot_var5c <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est5c <-  boot_se5c <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se5cl <- dredge(boot_m, rank="AIC", m.lim=c(5,5))
  boot_var5c_tmp <- attr(get.models(boot_se5cl, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var5c)-1)) {
    boot_var5c[i,j] <- ifelse(names(boot_var5c[i,j]) %in% boot_var5c_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var5c[i,][boot_var5c[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var5c[i, ncol(boot_var5c)] <- AIC(boot_mod)
  boot_est5c[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se5c[i, names(coef(summary(boot_mod)))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# FOR ADULTS

#----------------------------------------------------------------------------------------------------
# Initial setup

pred <- c("VCAM", "SDC", "Ang", "IL8", "ns1(IP10)", "IL1RA", "CD163", "TREM", "Fer", "CRP")
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat2, x=T, y=T)
full_est <- coef(full_mod)
pred_name <- names(full_est)[-1]
bootnum <- 1000

set.seed(51086)

#----------------------------------------------------------------------------------------------------
# Bootstrap for best of all combinations for adults

boot_vara <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_esta <-  boot_sea <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_seal <- dredge(boot_m, rank="AIC")
  boot_vara_tmp <- attr(get.models(boot_seal, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_vara)-1)) {
    boot_vara[i,j] <- ifelse(names(boot_vara[i,j]) %in% boot_vara_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_vara[i,][boot_vara[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_vara[i, ncol(boot_vara)] <- AIC(boot_mod)
  boot_esta[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_sea[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 2 variables for adults

boot_var2a <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est2a <-  boot_se2a <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se2al <- dredge(boot_m, rank="AIC", m.lim=c(2,2))
  boot_var2a_tmp <- attr(get.models(boot_se2al, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var2a)-1)) {
    boot_var2a[i,j] <- ifelse(names(boot_var2a[i,j]) %in% boot_var2a_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var2a[i,][boot_var2a[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var2a[i, ncol(boot_var2a)] <- AIC(boot_mod)
  boot_est2a[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se2a[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 3 variables for adults

boot_var3a <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est3a <-  boot_se3a <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se3al <- dredge(boot_m, rank="AIC", m.lim=c(3,3))
  boot_var3a_tmp <- attr(get.models(boot_se3al, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var3a)-1)) {
    boot_var3a[i,j] <- ifelse(names(boot_var3a[i,j]) %in% boot_var3a_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var3a[i,][boot_var3a[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var3a[i, ncol(boot_var3a)] <- AIC(boot_mod)
  boot_est3a[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se3a[i, names(coef(boot_mod))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 4 variables for adults

boot_var4a <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est4a <-  boot_se4a <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se4al <- dredge(boot_m, rank="AIC", m.lim=c(4,4))
  boot_var4a_tmp <- attr(get.models(boot_se4al, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var4a)-1)) {
    boot_var4a[i,j] <- ifelse(names(boot_var4a[i,j]) %in% boot_var4a_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var4a[i,][boot_var4a[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var4a[i, ncol(boot_var4a)] <- AIC(boot_mod)
  boot_est4a[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se4a[i, names(coef(summary(boot_mod)))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Bootstrap for best combination of 5 variables for adults

boot_var5a <- matrix(0, ncol = length(pred) + 1, nrow = bootnum, dimnames = list(NULL, c(pred, "aic")))
boot_est5a <-  boot_se5a <- matrix(0, ncol = length(pred_name) + 1, nrow = bootnum,
                               dimnames = list(NULL, c("(Intercept)", pred_name)))

for (i in 1:bootnum) {
  data_id <- sample(1:dim(dat1)[1], replace = T)
  dat_id <- dat1[data_id, ]
  boot_m <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat_id, x=T, y=T)
  boot_se5al <- dredge(boot_m, rank="AIC", m.lim=c(5,5))
  boot_var5a_tmp <- attr(get.models(boot_se5al, 1)[[1]]$terms, "term.labels")
  
  for (j in 1:(ncol(boot_var5a)-1)) {
    boot_var5a[i,j] <- ifelse(names(boot_var5a[i,j]) %in% boot_var5a_tmp, 1, 0)
  }
  
  formula <- paste("sev.or.inte~", paste(names(boot_var5a[i,][boot_var5a[i,]==1]), collapse = "+"))
  boot_mod <- glm(formula, data = dat_id, family = binomial, x = T, y = T)
  boot_var5a[i, ncol(boot_var5a)] <- AIC(boot_mod)
  boot_est5a[i, names(coef(boot_mod))] <- coef(boot_mod)
  boot_se5a[i, names(coef(summary(boot_mod)))] <- coef(summary(boot_mod))[, "Std. Error"]
}

#----------------------------------------------------------------------------------------------------
# Save bootstrap results for later use

save(boot_varc, boot_estc, boot_sec,
     boot_var2c, boot_est2c, boot_se2c,
     boot_var3c, boot_est3c, boot_se3c,
     boot_var4c, boot_est4c, boot_se4c,
     boot_var5c, boot_est5c, boot_se5c,
     boot_vara, boot_esta, boot_sea,
     boot_var2a, boot_est2a, boot_se2a,
     boot_var3a, boot_est3a, boot_se3a,
     boot_var4a, boot_est4a, boot_se4a,
     boot_var5a, boot_est5a, boot_se5a, 
     file="bootstrap_results.RData")

#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# SUMMARISE BOOTSTRAP RESULTS FOR CHILDREN

#----------------------------------------------------------------------------------------------------
# Best combination of 2 variables for children

pred <- c("VCAM", "SDC", "Ang", "IL8", "IP10", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat1, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(2,2))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat1, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est2c != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est2c) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est2c, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est2c, 2, median)
boot_025per <- apply(boot_est2c, 2, function(x) quantile(x, 0.025))
boot_975per <- apply(boot_est2c, 2, function(x) quantile(x, 0.975))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var2c))[1:length(pred)])
boot_modfreq <- data.frame(boot_var2c) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 3 variables for children

pred <- c("VCAM", "SDC", "Ang", "IL8", "IP10", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat1, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(3,3))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat1, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est3c != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est3c) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est3c, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est3c, 2, median)
boot_025per <- apply(boot_est3c, 2, function(x) quantile(x, 0.025))
boot_975per <- apply(boot_est3c, 2, function(x) quantile(x, 0.975))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var3c))[1:length(pred)])
boot_modfreq <- data.frame(boot_var3c) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 4 variables for children

pred <- c("VCAM", "SDC", "Ang", "IL8", "IP10", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat1, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(4,4))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat1, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est4c != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est4c) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est4c, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est4c, 2, median)
boot_025per <- apply(boot_est4c, 2, function(x) quantile(x, 0.025))
boot_975per <- apply(boot_est4c, 2, function(x) quantile(x, 0.975))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var4c))[1:length(pred)])
boot_modfreq <- data.frame(boot_var4c) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 5 variables for children

pred <- c("VCAM", "SDC", "Ang", "IL8", "IP10", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + IP10 + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat1, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(5,5))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat1, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est5c != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est5c) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est5c, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est5c, 2, median)
boot_025per <- apply(boot_est5c, 2, function(x) quantile(x, 0.025, na.rm=T))
boot_975per <- apply(boot_est5c, 2, function(x) quantile(x, 0.975, na.rm=T))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var5c))[1:length(pred)])
boot_modfreq <- data.frame(boot_var5c) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out


#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# SUMMARISE BOOTSTRAP RESULTS FOR ADULTS

#----------------------------------------------------------------------------------------------------
# Best combination of 2 variables for adults

pred <- c("VCAM", "SDC", "Ang", "IL8", "ns1(IP10)", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat2, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(2,2))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat2, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est2a != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est2a) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est2a, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est2a, 2, median)
boot_025per <- apply(boot_est2a, 2, function(x) quantile(x, 0.025))
boot_975per <- apply(boot_est2a, 2, function(x) quantile(x, 0.975))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var2a))[1:length(pred)])
boot_modfreq <- data.frame(boot_var2a) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 3 variables for adults

pred <- c("VCAM", "SDC", "Ang", "IL8", "ns1(IP10)", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat2, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(3,3))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat2, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est3a != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est3a) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est3a, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est3a, 2, median)
boot_025per <- apply(boot_est3a, 2, function(x) quantile(x, 0.025))
boot_975per <- apply(boot_est3a, 2, function(x) quantile(x, 0.975))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var3a))[1:length(pred)])
boot_modfreq <- data.frame(boot_var3a) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 4 variables for adults

pred <- c("VCAM", "SDC", "Ang", "IL8", "ns1(IP10)", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat2, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(4,4))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat2, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est4a != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est4a) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est4a, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est4a, 2, median)
boot_025per <- apply(boot_est4a, 2, function(x) quantile(x, 0.025, na.rm=T))
boot_975per <- apply(boot_est4a, 2, function(x) quantile(x, 0.975, na.rm=T))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var4a))[1:length(pred)])
boot_modfreq <- data.frame(boot_var4a) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out

#----------------------------------------------------------------------------------------------------
# Best combination of 5 variables for adults

pred <- c("VCAM", "SDC", "Ang", "IL8", "ns1(IP10)", "IL1RA", "CD163", "TREM", "Fer", "CRP")

## Estimate full model
full_mod <- glm(sev.or.inte ~ VCAM + SDC + Ang + IL8 + ns1(IP10) + IL1RA + CD163 + TREM + Fer + CRP, 
                family=binomial, data=dat2, x=T, y=T)
full_est <- coef(full_mod)
full_se <- coef(summary(full_mod))[, "Std. Error"]
pred_name <- names(full_est)[-1]

## Selected model
sel_m <- dredge(full_mod, rank="AIC", m.lim=c(5,5))
sel_var <- matrix(0, ncol = length(pred), nrow = 1, dimnames = list(NULL, pred))
sel_var_tmp <- attr(get.models(sel_m, 1)[[1]]$terms, "term.labels")

for (j in 1:ncol(sel_var)) {
  sel_var[1,j] <- ifelse(names(sel_var[1,j]) %in% sel_var_tmp, 1, 0)
}

formula <- paste("sev.or.inte~", paste(names(sel_var[1,][sel_var[1,]==1]), collapse = "+"))
sel_mod <- glm(formula, data = dat2, family = binomial, x = T, y = T)
sel_est <- coef(sel_mod[1])[c("(Intercept)", pred_name)]
sel_est[is.na(sel_est)] <- 0
names(sel_est) <- c("(Intercept)", pred_name)
sel_se <- coef(summary(sel_mod))[, "Std. Error"][c("(Intercept)", pred_name)]
sel_se[is.na(sel_se)] <- 0

## Bootstrap
bootnum <- 1000
load("bootstrap_results.RData") # Load bootstrap results
boot_01 <- (boot_est5a != 0) * 1
boot_inclusion <- apply(boot_01, 2, function(x) sum(x) / length(x) * 100)

## Overview of estimates and measures
sqe <- (t(boot_est5a) - full_est) ^ 2
rmsd <- apply(sqe, 1, function(x) sqrt(mean(x)))
rmsdratio <- rmsd / full_se
boot_mean <- apply(boot_est5a, 2, mean)
boot_meanratio <- boot_mean / full_est
boot_relbias <- (boot_meanratio / (boot_inclusion / 100) - 1) * 100
boot_median <- apply(boot_est5a, 2, median)
boot_025per <- apply(boot_est5a, 2, function(x) quantile(x, 0.025, na.rm=T))
boot_975per <- apply(boot_est5a, 2, function(x) quantile(x, 0.975, na.rm=T))

overview <- round(cbind(full_est, full_se, boot_inclusion, sel_est, sel_se,  
                        rmsdratio, boot_relbias, boot_median, boot_025per, 
                        boot_975per), 4)
overview <- overview[order(overview[,"boot_inclusion"], decreasing=T),]
overview

## Model frequency
group_cols <- c(colnames(data.frame(boot_var5a))[1:length(pred)])
boot_modfreq <- data.frame(boot_var5a) %>%
  mutate(time = 1) %>%
  group_by_(.dots = group_cols) %>%
  summarise(aic_median = round(median(aic),1),
            aic_025 = round(quantile(aic, 0.025),1),
            aic_975 = round(quantile(aic, 0.975),1),
            count = sum(time)) %>%
  ungroup() %>%
  mutate(percent = count/bootnum * 100) %>%
  arrange(desc(count)) %>%
  slice(1:20) %>%
  as.data.frame(.)

out <- boot_modfreq
for (i in 1:(ncol(out)-5)) {
  out[,i] <- ifelse(out[,i]==0, NA, "+")
}

out