simulation-functions.R 5.82 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Description =================================================================
# This file contains functions for simulating patients, assigning them to
# treatment groups, and setting their event probabilities based on treatment
# group and a covariate that could represent, for example, age or some
# meaningful biomarker.

# NOTE: The parameters of the PY functions are currently hard-coded.

# Required Packages ===========================================================
library(dplyr)
library(truncdist)

# Shared Functions ============================================================
# These functions are used regardless of the distribution of events

# Generate a skeleton dataset without events ----------------------------------

simulate_patients <- function(n = 5000, grpMin = 0, grpMax = 100,
                              pDays = 1500, treatP = 0.5) {
  grpVar <- runif(n, grpMin, grpMax)
  #PY <- rpois(n, pDays) / 365
  #PY <- rtrunc(n, "exp", 0, 2232, .000051)
  PY <- rtrunc(n, "gamma", 0, 2232, shape = 5.7, scale = 198)/365
  rTreat <- rbinom(n, 1, treatP)
  simData <- data.frame(cbind(grpVar, PY, rTreat))
  # Order
  simData <- simData[order(grpVar),]
  rownames(simData) <- as.character(1:n)
  simData
}

# Generate Factor Column with Cutpoints ---------------------------------------

generate_cutpoints <- function(x, quantiles = c(0, 1/3, 2/3, 1)) {
  out <- rep(NA, length(x))
  quantOrder <- sort(quantiles) # in case they are entered out of order
  out <- cut(x, quantile(x, probs = quantOrder), include.lowest = TRUE)
  for (i in 1:length(levels(out))) {
    levels(out)[i] = paste("cut", i, sep = "")
  }
  out
}

# Obtain event counts and PY from simulated patient data ----------------------

get_events_PY <- function(PY, rTreat, events, cuts) { 
  data <- data.frame(PY, rTreat, events, cuts)
  out <- group_by(data, rTreat, cuts) %>%
    summarize(PY = sum(PY), events = sum(events))
  e1 <- filter(out, rTreat == 0)$events
  e2 <- filter(out, rTreat == 1)$events
  PY1 <- filter(out, rTreat == 0)$PY
  PY2 <- filter(out, rTreat == 1)$PY

  data.frame(e1 = e1, py1 = PY1, e2 = e2, py2 = PY2)
}

# Poisson Simulation limited to one event per simulated subject ===============

poisEvents <- function(n, probs, rTreat) {
  events_tmp <- rpois(n, probs)
  while(max(events_tmp) > 1){
    ind_0 = which(events_tmp[rTreat == 0] > 1) # Indices within subset
    ind_1 = which(events_tmp[rTreat == 1] > 1) # Indices within subset
    events_tmp[rTreat==0][ind_0] <- events_tmp[rTreat==0][ind_0] - 1
    events_tmp[rTreat==1][ind_1] <- events_tmp[rTreat==1][ind_1] - 1
    for(i in ind_0) {
          dir = sample(c(1,-1), 1)
          events_tmp[rTreat==0][i + dir] <- events_tmp[rTreat==0][i + dir] + 1
    } 

    for(i in ind_1) {
      dir = sample(c(1, -1), 1)
      events_tmp[rTreat==1][i + dir] <- events_tmp[rTreat==1][i+dir] + 1
    }
  }
  events_tmp
}


# Different Event Distributions =============================================== 

# Generate events with probabilities linear in the covariate ------------------

generate_linear_events <- function(min_1 = .005, max_1 = .03,
                                   min_0 = .01, max_0 = .035,
                                   upper = 0, grpVar, PY, rTreat) {
  probs <- numeric(length(grpVar))
  probs[rTreat == 0] <- seq(from = min_0, to = max_0,
                          length.out = sum(rTreat == 0))
  probs[rTreat == 1] <- seq(from = min_1, to = max_1,
                            length.out = sum(rTreat == 1))
  topthird <- which(rTreat == 0 & grpVar > quantile(grpVar, 1/3))
  probs[topthird] <- probs[topthird] + upper

  probs <- probs * PY

  poisEvents(n = length(grpVar), probs = probs, rTreat = rTreat)
  # rbinom(length(grpVar), 1, probs)
  
}

# Wrapper Function

simulate_patients_linear <- function(n = 5000, grpMin = 0, grpMax = 100,
                                     pDays = 1095, treatP = 0.5, min_1 = .005,
                                     max_1 = .03, min_0 = .01, max_0 = .035,
                                     upper = 0) {
  patients <- simulate_patients(n, grpMin, grpMax, pDays, treatP)
  events <- generate_linear_events(min_1, max_1, min_0, max_0,
                                   grpVar = patients$grpVar, PY = patients$PY,
                                   rTreat = patients$rTreat, upper = upper)
  cbind(patients, events)
}

# Generate events with stepwise probabilities ---------------------------------

generate_stepwise_events <- function(probs_1 = c(.005, .0175, .03),
                                     probs_0 = c(.01, .0225, .035),
                                     grpVar, PY, rTreat) {
  n_groups <- length(probs_0)
  grp0_length <- sum(rTreat == 0)
  grp1_length <- sum(rTreat == 1)
  grp0_p <- numeric()
  grp1_p <- numeric()
  
  for (i in 1:n_groups) {
    grp0_p=c(grp0_p, rep(probs_0[i], ceiling((i/n_groups) * grp0_length) -
                         ceiling(((i - 1) / n_groups) * grp0_length)))
    grp1_p=c(grp1_p, rep(probs_1[i], ceiling((i/n_groups) * grp1_length) -
                         ceiling(((i - 1) / n_groups) * grp1_length)))
  }
  
  probs <- numeric(length(grpVar))
  probs[rTreat == 0] <- grp0_p
  probs[rTreat == 1] <- grp1_p
  probs=probs * PY
  #rbinom(length(grpVar), 1, probs)
  poisEvents(n = length(grpVar), probs = probs, rTreat = rTreat)

}

# Wrapper Function

simulate_patients_stepwise <- function(n = 5000, grpMin = 0,grpMax = 100,
                                       pDays = 1095, treatP = 0.5,
                                       probs_1 = c(.005, .01, .015),
                                       probs_0 = c(.01, .015, .02)) {
  patients <- simulate_patients(n, grpMin, grpMax, pDays, treatP)
  events <- generate_stepwise_events(probs_1, probs_0,
                                     grpVar = patients$grpVar,
                                     PY = patients$PY,
                                     rTreat = patients$rTreat)
  cbind(patients, events)
}