## Simulation Code for Example 2 in McGowan, Nix, Murphy, Bierman, & CPPRG ## Written by Herle McGowan ## We set the standardized values of the parameters and then iterate to find the corresponding unstandardized values ## The unstandardized values are then used to genertate variables that, when standardized, would return the standardized parameters we set ## Parameters are standardized according to the following relations (see endnote 6 in the paper cited above): # For parameters of a continuous predictor of a continuous response (e.g. delta, phi, beta): standardized parameter = unstandardized parameter * (sd(predictor) / sd(response)) # For parameters of a continuous predictor of a binary response (e.g. gamma): standardized parameter = unstandardized parameter * sd(predictor) # Parameters of a binary predictor of a binary response (e.g. theta) are not standardized ################################################# ## Initial set-up and assignments of variables ## ################################################# # Inverse logit (expit) function written by Julian Faraway # Available in library(faraway) ilogit <- function (x) { if (any(omit <- is.na(x))) { lv <- x lv[omit] <- NA if (any(!omit)) lv[!omit] <- Recall(x[!omit]) return(lv) } exp(x)/(1 + exp(x)) } # Set seed so that results can be reproduced set.seed(1023) # Set number of time points to be examined n.time <- 22 # Set sample size to be used for finding unstandardize parameters n <- 10000 # Intitialize matricies to store results of variable generation c.temp<- matrix(0, nrow=n, ncol=(n.time+1)) p.temp<- matrix(0, nrow=n, ncol=(n.time+1)) d.temp<- matrix(0, nrow=n, ncol=(n.time+1)) # Define vectors to store results of unstandardized parameters gamma <- rep(0, n.time) # parameters from confounder to dose delta <- rep(0, n.time) # parameters from confounder to confounder # Define vectors to store set values of standardized parameters for each simulation ## Note that only the code for Simulation A when the size of the counfounding is -0.02 will run with the current parameter values ## To obtain results for other simulations, change the parameter values according to Table 1 in the paper sigma <- 1 # Standard deviation of random error for each variable s.gamma <- rep(c(0,0.14), c(1,n.time)) # Standardized parameters from confounder to dose s.delta <- rep(c(0,1), c(1,n.time)) # Standardized parameters from confounder to confounder s.theta <- 1 # Standardized parameters from dose to dose s.phi <- -0.14 # Standardized parameters from confounder to s.beta <- 0 # Standardized parameters from dose to y ########################################################## ## Iterate to calcuate unstandardized parameter values ## ## According to formula given in header ## ########################################################## # Generate initial pretreatment confounder c.temp[,1] <- rnorm(n, 0, sigma) # Calculate unstandardized parameter from confounder to dose gamma[1] <- s.gamma[1] / sd(c.temp[,1]) # Generate initial first dose p.temp[,2] <- ilogit(gamma[1]*c.temp[,1]) d.temp[,2] <- rbinom(n, 1, p.temp[,2]) # Calculate initial unstandardized parameter from confounder to confounder delta[1] <- s.delta[1] * sigma / sd(c.temp[,1]) # Iterate to refine unstandardized parameter delta repeat{ # Generate initial confounder at time 1 c.temp[,2] <- delta[1]*c.temp[,1] + rnorm(n,0,sigma) # See what standardized parameters would be based on this data delta01.std <- delta[1] * sd(c.temp[,1]) / sd(c.temp[,2]) # Check if these are what we want standardized parameters to be if((delta01.std - s.delta[1])^2 < 1e-4) { break } # If not, update unstandardized parameters and continue delta[1] <- s.delta[1] * sd(c.temp[,2]) / sd(c.temp[,1]) } # End repeat # Loop to generate initial variables for time 2 to end for(i in 3:(n.time+1)){ # Calculate unstandardized parameter from confounder to dose gamma[(i-1)] <- s.gamma[(i-1)] / sd(c.temp[,(i-1)]) # Set unstandardized parameter from dose to dose theta <- s.theta # Generate initial dose at time i p.temp[,i] <- ilogit(gamma[(i-1)]*c.temp[,(i-1)] + theta*d.temp[,(i-1)]) d.temp[,i] <- rbinom(n, 1, p.temp[,i]) # Calculate initial unstandardized parameters from confounder to confounder delta[(i-1)] <- s.delta[(i-1)] * sigma / sd(c.temp[,(i-1)]) # Iterate to refine initial unstandardized delta repeat{ # Generate confounder at time i c.temp[,i] <- delta[(i-1)]*c.temp[,(i-1)] + rnorm(n,0,sigma) # See what standardized parameter would be based on this data delta.std <- delta[(i-1)] * sd(c.temp[,(i-1)]) / sd(c.temp[,i]) # Check if this is what we want standardized parameter to be if((delta.std - s.delta[(i-1)])^2 < 1e-4) {break} # If not, update unstandardized parameter and continue delta[(i-1)] <- s.delta[(i-1)] * sd(c.temp[,i]) / sd(c.temp[,(i-1)]) } # End repeat } # End for loop # Calculate initial cummulative dose across all time points cum.d <- apply(d.temp[,-1], 1, sum) # Calculate initial unstandardized parameter confounder to y phi <- s.phi * sigma / sd(c.temp[,n.time]) # Calculate initial unstandardized parameter dose to y beta <- s.beta * sigma / sd(cum.d) # Iterate to refine initial unstandardized parameters phi and beta repeat{ # Generate y y <- phi*c.temp[,n.time] + beta*cum.d + rnorm(n,0,sigma) # See what standardized parameters would be based on this data phi.std <- phi * sd(c.temp[,n.time]) / sd(y) beta.std <- beta * sd(cum.d) / sd(y) # Check if these are what we want standardized parameters to be if(((phi.std - s.phi)^2 + (beta.std - s.beta)^2) < 1e-4) { break } # If not, update unstandardized parameters and continue phi <- s.phi * sd(y) / sd(c.temp[,n.time]) beta <- s.beta * sd(y) / sd(cum.d) } # End repeat ## End calcuate unstandardized parameter values ################################################## ## Generate data and Estimate regression models ## ################################################## # Set number of datasets to be created nn <- 1000 # Set sample size for each dataset n <- 400 # Intitialize matricies to store data conf <- matrix(0, nrow=n, ncol=(n.time+1)) prob <- matrix(0, nrow=n, ncol=(n.time+1)) dose <- matrix(0, nrow=n, ncol=(n.time+1)) # Define list to save regression results reg <- list() ## Loop to create multiple datasets and estimate regression models for(j in 1:nn){ ####################### ## Generate Datasets ## ####################### # Generate pretreatment confounder conf[,1] <- rnorm(n, 0, sigma) # Generate first dose prob[,2] <- ilogit(gamma[1]*conf[,1]) dose[,2] <- rbinom(n, 1, prob[,2]) # Generate confounder at time 1 conf[,2] <- delta[1]*conf[,1] + rnorm(n,0,sigma) # Loop to generate dose and confounder at remaining time points for(i in 3:(n.time+1)){ # Generate dose at time i prob[,i] <- ilogit(gamma[(i-1)]*conf[,(i-1)] + theta*dose[,(i-1)]) dose[,i] <- rbinom(n, 1, prob[,i]) # Generate confounder at time i conf[,i] <- delta[(i-1)]*conf[,(i-1)] + rnorm(n,0,sigma) } # End for loop # Calculate cummulative dose across all time points cum.d <- apply(dose[,-1], 1, sum) # Generate y y <- phi*conf[,n.time] + beta*cum.d + rnorm(n,0,sigma) ## End generate data ######################################## ## Estimate regression models ## ## Ignoring time-varying confounders ## ######################################## # Estimate linear regression of y on cum.d or regression of y on pretreatment confounder and cum.d, where appropriate ifelse(s.gamma[1]==0,fit <- lm(y~cum.d, x=T),fit <- lm(y~conf[,1]+cum.d, x=T)) # Save estimated betas and calculate se(beta), t-statistics beta.hat <- fit$coeff se <- sqrt(diag(solve(t(fit$x)%*%fit$x)*summary(fit)$sigma^2)) s.beta.hat <- beta.hat * sd(cum.d) / sd(y) t <- beta.hat / se # Save each estimated model reg[[j]] <- list(beta.hat=beta.hat,se=se,t=t,s.beta.hat=s.beta.hat) ## End estimate models ignoring time-varying confounders } ## End loop to create multiple datasets and estimate regression models ############################################# ## Access estimates from regression models ## ############################################# # Format saved results as dataframe for easier access all <- data.frame(t(mapply(as.matrix,lapply(reg,as.data.frame)))) # Access estimates when there is no effect of the pretreatment confounder (Simulations A and B) if(s.gamma[1]==0){ print("Average estimate of standardized beta") print("Calculated as the mean of standardized betas across 1000 data sets") print(round(mean(all[,8]),3)) print("-----") if(s.beta==0){ print("Type 1 Error Rate for Simulation A (true beta = 0)") print("Calculated as the proportion of |t| > 1.96 across 1000 data sets") print(mean(ifelse(abs(all[,6])>1.96,1,0))) } # End if if(s.beta!=0){ print("Power for Simulation B (true beta > 0)") print("Calculated as the proportion of t > 1.645 across 1000 data sets") print(mean(ifelse(all[,6]>1.645,1,0))) } # End if } # End if statement # Access estimates there is an effect of the pretreatment confounder (Simulation C) if(s.gamma[1]!=0){ print("Average estimate of standardized beta") print("Calculated as the mean of standardized betas across 1000 data sets") print(round(mean(all[,12]),3)) print("-----") if(s.beta==0){ print("Type 1 Error Rate for Simulation C (true beta = 0)") print("Calculated as the proportion of |t| > 1.96 across 1000 data sets") print(mean(ifelse(abs(all[,9])>1.96,1,0))) } # End if } # End if statement ## End access estimates