## Simulation Code for Example 2 in McGowan, Nix, Murphy, Bierman, & CPPRG
## Written by Herle McGowan

## We set the standardized values of the parameters and then iterate to find the corresponding unstandardized values
## The unstandardized values are then used to genertate variables that, when standardized, would return the standardized parameters we set
## Parameters are standardized according to the following relations (see endnote 6 in the paper cited above):
   # For parameters of a continuous predictor of a continuous response (e.g. delta, phi, beta): standardized parameter = unstandardized parameter * (sd(predictor) / sd(response))
   # For parameters of a continuous predictor of a binary response (e.g. gamma): standardized parameter = unstandardized parameter * sd(predictor)
   # Parameters of a binary predictor of a binary response (e.g. theta) are not standardized


#################################################
## Initial set-up and assignments of variables ##
#################################################

# Inverse logit (expit) function written by Julian Faraway
# Available in library(faraway)
ilogit <- function (x) {
    if (any(omit <- is.na(x))) {
        lv <- x
        lv[omit] <- NA
        if (any(!omit))
            lv[!omit] <- Recall(x[!omit])
        return(lv)
    }
    exp(x)/(1 + exp(x))
}

# Set seed so that results can be reproduced
set.seed(1023)

# Set number of time points to be examined
n.time <- 22

# Set sample size to be used for finding unstandardize parameters
n <- 10000

# Intitialize matricies to store results of variable generation
c.temp<- matrix(0, nrow=n, ncol=(n.time+1))
p.temp<- matrix(0, nrow=n, ncol=(n.time+1))
d.temp<- matrix(0, nrow=n, ncol=(n.time+1))

# Define vectors to store results of unstandardized parameters
gamma <- rep(0, n.time)             # parameters from confounder to dose
delta <- rep(0, n.time)             # parameters from confounder to confounder

# Define vectors to store set values of standardized parameters for each simulation
## Note that only the code for Simulation A when the size of the counfounding is -0.02 will run with the current parameter values
## To obtain results for other simulations, change the parameter values according to Table 1 in the paper
sigma   <- 1                    # Standard deviation of random error for each variable
s.gamma <- rep(c(0,0.14), c(1,n.time))      # Standardized parameters from confounder to dose
s.delta <- rep(c(0,1), c(1,n.time))         # Standardized parameters from confounder to confounder
s.theta <- 1                    # Standardized parameters from dose to dose
s.phi   <- -0.14                # Standardized parameters from confounder to
s.beta  <- 0                    # Standardized parameters from dose to y


##########################################################
## Iterate to calcuate unstandardized parameter values  ##
## According to formula given in header         ##
##########################################################

# Generate initial pretreatment confounder
c.temp[,1] <- rnorm(n, 0, sigma)

# Calculate unstandardized parameter from confounder to dose
gamma[1] <- s.gamma[1] / sd(c.temp[,1])

# Generate initial first dose
p.temp[,2] <- ilogit(gamma[1]*c.temp[,1])
d.temp[,2] <- rbinom(n, 1, p.temp[,2])

# Calculate initial unstandardized parameter from confounder to confounder
delta[1] <- s.delta[1] * sigma / sd(c.temp[,1])

# Iterate to refine unstandardized parameter delta
repeat{

    # Generate initial confounder at time 1
    c.temp[,2] <- delta[1]*c.temp[,1] + rnorm(n,0,sigma)

    # See what standardized parameters would be based on this data
    delta01.std <- delta[1] * sd(c.temp[,1]) / sd(c.temp[,2])

    # Check if these are what we want standardized parameters to be
    if((delta01.std - s.delta[1])^2 < 1e-4) { break }

    # If not, update unstandardized parameters and continue
    delta[1] <- s.delta[1] * sd(c.temp[,2]) / sd(c.temp[,1])

} # End repeat

# Loop to generate initial variables for time 2 to end
for(i in 3:(n.time+1)){

    # Calculate unstandardized parameter from confounder to dose
    gamma[(i-1)]   <- s.gamma[(i-1)] / sd(c.temp[,(i-1)])

    # Set unstandardized parameter from dose to dose
    theta <- s.theta

    # Generate initial dose at time i
    p.temp[,i] <- ilogit(gamma[(i-1)]*c.temp[,(i-1)] + theta*d.temp[,(i-1)])
    d.temp[,i] <- rbinom(n, 1, p.temp[,i])

    # Calculate initial unstandardized parameters from confounder to confounder
    delta[(i-1)]   <- s.delta[(i-1)] * sigma / sd(c.temp[,(i-1)])

    # Iterate to refine initial unstandardized delta
    repeat{

        # Generate confounder at time i
        c.temp[,i] <- delta[(i-1)]*c.temp[,(i-1)] + rnorm(n,0,sigma)

        # See what standardized parameter would be based on this data
        delta.std <- delta[(i-1)] * sd(c.temp[,(i-1)]) / sd(c.temp[,i])

        # Check if this is what we want standardized parameter to be
        if((delta.std - s.delta[(i-1)])^2 < 1e-4) {break}

        # If not, update unstandardized parameter and continue
        delta[(i-1)] <- s.delta[(i-1)] * sd(c.temp[,i]) / sd(c.temp[,(i-1)])

    } # End repeat

} # End for loop

# Calculate initial cummulative dose across all time points
cum.d <- apply(d.temp[,-1], 1, sum)

# Calculate initial unstandardized parameter confounder to y
phi <- s.phi * sigma / sd(c.temp[,n.time])

# Calculate initial unstandardized parameter dose to y
beta <- s.beta * sigma / sd(cum.d)

# Iterate to refine initial unstandardized parameters phi and beta
repeat{

    # Generate y
    y <- phi*c.temp[,n.time] + beta*cum.d + rnorm(n,0,sigma)

    # See what standardized parameters would be based on this data
    phi.std <- phi * sd(c.temp[,n.time]) / sd(y)
    beta.std <- beta * sd(cum.d) / sd(y)

    # Check if these are what we want standardized parameters to be
    if(((phi.std - s.phi)^2 + (beta.std - s.beta)^2) < 1e-4) { break }

    # If not, update unstandardized parameters and continue
    phi <- s.phi * sd(y) / sd(c.temp[,n.time])
    beta <- s.beta * sd(y) / sd(cum.d)

} # End repeat

## End calcuate unstandardized parameter values


##################################################
## Generate data and Estimate regression models ##
##################################################

# Set number of datasets to be created
nn <- 1000

# Set sample size for each dataset
n <- 400

# Intitialize matricies to store data
conf <- matrix(0, nrow=n, ncol=(n.time+1))
prob <- matrix(0, nrow=n, ncol=(n.time+1))
dose <- matrix(0, nrow=n, ncol=(n.time+1))

# Define list to save regression results
reg <- list()

## Loop to create multiple datasets and estimate regression models
for(j in 1:nn){

    #######################
    ## Generate Datasets ##
    #######################

    # Generate pretreatment confounder
    conf[,1] <- rnorm(n, 0, sigma)

    # Generate first dose
    prob[,2] <- ilogit(gamma[1]*conf[,1])
    dose[,2] <- rbinom(n, 1, prob[,2])

    # Generate confounder at time 1
    conf[,2] <- delta[1]*conf[,1] + rnorm(n,0,sigma)

    # Loop to generate dose and confounder at remaining time points
    for(i in 3:(n.time+1)){

        # Generate dose at time i
        prob[,i] <- ilogit(gamma[(i-1)]*conf[,(i-1)] + theta*dose[,(i-1)])
        dose[,i] <- rbinom(n, 1, prob[,i])

        # Generate confounder at time i
        conf[,i] <- delta[(i-1)]*conf[,(i-1)] + rnorm(n,0,sigma)

    } # End for loop

    # Calculate cummulative dose across all time points
    cum.d <- apply(dose[,-1], 1, sum)

    # Generate y
    y <- phi*conf[,n.time] + beta*cum.d + rnorm(n,0,sigma)

    ## End generate data


    ########################################
    ## Estimate regression models         ##
    ## Ignoring time-varying confounders ##
    ########################################

    # Estimate linear regression of y on cum.d or regression of y on pretreatment confounder and cum.d, where appropriate
    ifelse(s.gamma[1]==0,fit <- lm(y~cum.d, x=T),fit <- lm(y~conf[,1]+cum.d, x=T))

    # Save estimated betas and calculate se(beta), t-statistics
    beta.hat <- fit$coeff
    se <- sqrt(diag(solve(t(fit$x)%*%fit$x)*summary(fit)$sigma^2))
    s.beta.hat <- beta.hat * sd(cum.d) / sd(y)
    t <- beta.hat / se

    # Save each estimated model
    reg[[j]] <- list(beta.hat=beta.hat,se=se,t=t,s.beta.hat=s.beta.hat)

    ## End estimate models ignoring time-varying confounders

} ## End loop to create multiple datasets and estimate regression models


#############################################
## Access estimates from regression models ##
#############################################

# Format saved results as dataframe for easier access
all <- data.frame(t(mapply(as.matrix,lapply(reg,as.data.frame))))

# Access estimates when there is no effect of the pretreatment confounder (Simulations A and B)
if(s.gamma[1]==0){

    print("Average estimate of standardized beta")
    print("Calculated as the mean of standardized betas across 1000 data sets")
    print(round(mean(all[,8]),3))
    print("-----")

    if(s.beta==0){
    print("Type 1 Error Rate for Simulation A (true beta = 0)")
    print("Calculated as the proportion of |t| > 1.96 across 1000 data sets")
    print(mean(ifelse(abs(all[,6])>1.96,1,0)))
    } # End if

    if(s.beta!=0){
    print("Power for Simulation B (true beta > 0)")
    print("Calculated as the proportion of t > 1.645 across 1000 data sets")
    print(mean(ifelse(all[,6]>1.645,1,0)))
    } # End if

} # End if statement

# Access estimates there is an effect of the pretreatment confounder (Simulation C)
if(s.gamma[1]!=0){
    print("Average estimate of standardized beta")
    print("Calculated as the mean of standardized betas across 1000 data sets")
    print(round(mean(all[,12]),3))
    print("-----")

    if(s.beta==0){
    print("Type 1 Error Rate for Simulation C (true beta = 0)")
    print("Calculated as the proportion of |t| > 1.96 across 1000 data sets")
    print(mean(ifelse(abs(all[,9])>1.96,1,0)))
    } # End if

} # End if statement

## End access estimates