glmsmurf_fit_algorithm.R 9.82 KB
Newer Older
Tom Reynkens's avatar
Tom Reynkens committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
###############################################
#
# SMuRF algorithm
#
###############################################


# SMuRF algorithm; re-estimation and output is handled in the .glmsmurf.fit.internal function
#
# X: The design matrix including ones for the intercept
# y: Response vector
# weights: Vector of prior weights
# start: vector of starting values for the coefficients
# offset: Vector containing the offset for the model
# family: Family object
# pen.cov: List with penalty type per predictor
# n.par.cov: List with number of parameters to estimate per predictor
# group.cov: List with group of each predictor which is only used for the Group Lasso penalty, 0 means no group
# lambda: Penalty parameter
# lambda1: List with lambda1 multiplied with penalty weights (for Lasso penalty) per predictor. The penalty parameter for the L_1-penalty in Sparse (Generalized) Fused Lasso or Sparse Graph-Guided Fused Lasso is lambda*lambda1
# lambda2: List with lambda2 multiplied with penalty weights (for Group Lasso penalty) per predictor. The penalty parameter for the L_2-penalty in Group (Generalized) Fused Lasso or Group Graph-Guided Fused Lasso is lambda*lambda2
# .scaled.ll: The scaled log-likelihood function
# pen.mat: List with (weighted) penalty matrix per predictor
# pen.mat.aux: List with eigenvector matrix and eigenvalue vector of (weighted) penalty matrix per predictor
# epsilon: Numeric tolerance value for stopping criterion
# maxiter: Maximum number of iterations of the SMuRF algorithm
# step: Initial step size
# tau: Parameter for backtracking the step size
# po.ncores: Number of cores used when computing the proximal operators
# print: A logical indicating if intermediate results need to be printed
.glmsmurf.algorithm <- function(X, y, weights, start, offset, family, pen.cov, n.par.cov, group.cov,
                                lambda, lambda1, lambda2, .scaled.ll,
                                pen.mat, pen.mat.aux, epsilon, maxiter, step, tau, po.ncores, print) {
  
  
  # Inverse of the link function
  linkinv <- family$linkinv
  
  # Sample size
  n <- length(y)
  
  ###########
  # Initialisation
  
  # Initialise values of beta for use later on
  beta.old <- start
  beta.new <- start
Tom Reynkens's avatar
Tom Reynkens committed
48

Tom Reynkens's avatar
Tom Reynkens committed
49 50 51 52 53 54 55 56
  
  # Initialise counter for while-loop
  iter <- 0L
  
  # Initialise alpha's for acceleration
  alpha.old <- 0 # This value is overwritten before it will be used
  alpha.new <- 1
  # Initialise theta for acceleration
57
  theta <- beta.new
Tom Reynkens's avatar
Tom Reynkens committed
58
  # Count the subsequent number of restarts (to avoid infinite loops)
59
  subs.restart <- 0L
Tom Reynkens's avatar
Tom Reynkens committed
60 61 62 63
  # Lower bound for step size
  step.low <- 1e-14
  
  # Initialise old objective function
64
  obj.beta.old <- 0
Tom Reynkens's avatar
Tom Reynkens committed
65 66 67 68
  # Starting value for mu (using starting value for beta)
  mu.cand <- linkinv(as.numeric(X %*% start + offset))
  
  # f function: minus scaled log-likelihood function
69 70 71
  f.beta.cand <- -.scaled.ll(y = y, n = n, mu = mu.cand, wt = weights)
  # Set f.beta.cand to infinity if numerical problems occur
  if (is.nan(f.beta.cand)) f.beta.cand <- Inf
Tom Reynkens's avatar
Tom Reynkens committed
72 73
  
  # g function: total penalty
Tom Reynkens's avatar
Tom Reynkens committed
74
  g.beta.cand <- .pen.tot(beta = start, pen.cov = pen.cov, n.par.cov = n.par.cov, group.cov = group.cov, 
75
                          pen.mat.cov = pen.mat, lambda = lambda, lambda1 = lambda1, lambda2 = lambda2)
Tom Reynkens's avatar
Tom Reynkens committed
76 77
  
  # Initialise new objective function
78
  obj.beta.new <- f.beta.cand + g.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
79 80 81 82 83
  
  ###########
  # While-loop
  
  # Update estimates for beta if stopping criterion not met
84
  while ((abs((obj.beta.old - obj.beta.new) / obj.beta.old) > epsilon | iter==0) & iter < maxiter) {
Tom Reynkens's avatar
Tom Reynkens committed
85 86
    
    # Update old objective function
87
    obj.beta.old <- obj.beta.new
Tom Reynkens's avatar
Tom Reynkens committed
88 89 90 91
    # Update old estimates for beta
    beta.old <- beta.new
    
    ###########
92
    # Updates for values depending on theta
Tom Reynkens's avatar
Tom Reynkens committed
93 94
    
    # Compute eta and mu based on estimates for theta
95 96
    eta.theta <- as.numeric(X %*% theta + offset)
    mu.theta <- linkinv(eta.theta)
Tom Reynkens's avatar
Tom Reynkens committed
97 98
    
    # Compute f: minus scaled log-likelihood using estimates for theta
99 100 101
    f.theta <- -.scaled.ll(y = y, n = n, mu = mu.theta, wt = weights)
    # Set f.theta to infinity if numerical problems occur
    if (is.nan(f.theta)) f.theta <- Inf
Tom Reynkens's avatar
Tom Reynkens committed
102 103 104 105 106 107
    
    
    ###########
    # Gradient update
    
    # Compute gradient
108
    grad <- as.numeric((weights * (mu.theta - y) / family$variance(mu.theta) * family$mu.eta(eta.theta)) %*% X / sum(weights != 0))
Tom Reynkens's avatar
Tom Reynkens committed
109
    # Note that this is the same as
110
    # grad <- as.numeric(Matrix::t(X) %*% as.vector(weights * (mu.theta - y) / family$variance(mu.theta) * family$mu.eta(eta.theta)) / sum(weights != 0))
Tom Reynkens's avatar
Tom Reynkens committed
111 112
    
    # Compute gradient update
113
    beta.tilde <- theta - step * grad
Tom Reynkens's avatar
Tom Reynkens committed
114 115 116 117 118 119 120 121 122 123 124 125 126
    
    
    ###########
    # Proximal operators
    
    # Compute proximal operators per covariate to update beta
    beta.cand <- .PO(beta.tilde = beta.tilde, beta.old = beta.old, pen.cov = pen.cov, n.par.cov = n.par.cov, group.cov = group.cov, pen.mat.cov = pen.mat, pen.mat.cov.aux = pen.mat.aux, 
                     lambda = lambda, lambda1 = lambda1, lambda2 = lambda2, step = step, po.ncores = po.ncores)
    
    # Compute mu based on estimates for beta
    mu.cand <- linkinv(as.numeric(X %*% beta.cand + offset))
    
    # Compute f: minus scaled log-likelihood using estimates for beta
127 128 129
    f.beta.cand <- -.scaled.ll(y = y, n = n, mu = mu.cand, wt = weights)
    # Set f.beta.cand to infinity if numerical problems occur
    if (is.nan(f.beta.cand)) f.beta.cand <- Inf
Tom Reynkens's avatar
Tom Reynkens committed
130 131
    
    # Compute g: total penalty
Tom Reynkens's avatar
Tom Reynkens committed
132
    g.beta.cand <- .pen.tot(beta = beta.cand, pen.cov = pen.cov, n.par.cov = n.par.cov, group.cov = group.cov, pen.mat.cov = pen.mat, 
133
                            lambda = lambda, lambda1 = lambda1, lambda2 = lambda2)
Tom Reynkens's avatar
Tom Reynkens committed
134 135
    
    # Objective function
136
    obj.beta.cand <- f.beta.cand + g.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
137 138 139 140 141
    
    ###########
    # Backtracking step size
    
    # Auxiliary function
142
    h.theta <- f.theta + grad %*% (beta.cand - theta) + 1/(2*step) * sum((beta.cand - theta)^2) + g.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
143 144 145
    
    # Reduce step size until objective function in beta.cand is below auxiliary function in beta.cand
    # or too small step size
146
    while (obj.beta.cand > h.theta & step >= step.low) {
Tom Reynkens's avatar
Tom Reynkens committed
147 148 149 150 151 152 153
      
      # Reduce step size
      step <- step * tau
      
      ###########
      # Gradient update
      
154
      beta.tilde <- theta - step * grad
Tom Reynkens's avatar
Tom Reynkens committed
155 156 157 158 159 160 161 162 163 164 165 166
      
      ###########
      # Proximal operators
      
      # Compute proximal operators per covariate to update beta
      beta.cand <- .PO(beta.tilde = beta.tilde, beta.old = beta.old, pen.cov = pen.cov, n.par.cov = n.par.cov, group.cov = group.cov, pen.mat.cov = pen.mat, pen.mat.cov.aux = pen.mat.aux, 
                       lambda = lambda, lambda1 = lambda1, lambda2 = lambda2, step = step, po.ncores = po.ncores)
      
      # Compute mu based on updated estimates for beta
      mu.cand <- linkinv(as.numeric(X %*% beta.cand + offset))
      
      # Compute f: minus scaled log-likelihood using updated estimates for beta
167 168 169
      f.beta.cand <- -.scaled.ll(y = y, n = n, mu = mu.cand, wt = weights)
      # Set f.beta.cand to infinity if numerical problems occur
      if (is.nan(f.beta.cand)) f.beta.cand <- Inf
Tom Reynkens's avatar
Tom Reynkens committed
170 171
      
      # Compute g: total penalty
Tom Reynkens's avatar
Tom Reynkens committed
172 173
      g.beta.cand <- .pen.tot(beta = beta.cand, pen.cov = pen.cov, n.par.cov = n.par.cov, group.cov = group.cov, pen.mat.cov = pen.mat, 
                              lambda = lambda, lambda1 = lambda1, lambda2 = lambda2)
Tom Reynkens's avatar
Tom Reynkens committed
174 175
      
      # Objective function
176
      obj.beta.cand <- f.beta.cand + g.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
177 178
      
      # Auxiliary function
179
      h.theta <- f.theta + grad %*% (beta.cand - theta) + 1/(2*step) * sum((beta.cand - theta)^2) + g.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
180 181 182 183 184 185
    }
    
    
    # Obtained new estimate for beta
    beta.new <- beta.cand
    # Obtained new objective function
186
    obj.beta.new <- obj.beta.cand
Tom Reynkens's avatar
Tom Reynkens committed
187 188 189 190 191 192 193 194 195 196
    
    ###########
    # Acceleration updates
    
    # Update values for alpha
    alpha.old <- alpha.new 
    alpha.new <- (1 + sqrt(1 + 4 * alpha.old^2)) / 2
    
    # If objective function increases in this iteration:
    # keep old estimate for beta and reset values for alpha 
197
    if (obj.beta.new > obj.beta.old * (1 + epsilon)) {
Tom Reynkens's avatar
Tom Reynkens committed
198 199
      
      beta.new <- beta.old
200 201
      obj.beta.new <- obj.beta.old
      obj.beta.old <- 0
Tom Reynkens's avatar
Tom Reynkens committed
202
      
203
      # This value is overwritten before it will be used since beta.new = beta.old (see theta below)
Tom Reynkens's avatar
Tom Reynkens committed
204 205 206 207 208
      alpha.old <- 0
      # This value is used in next iteration
      alpha.new <- 1
      
      # Add 1 to subsequent restart counter
209
      subs.restart <- subs.restart + 1L
Tom Reynkens's avatar
Tom Reynkens committed
210 211 212
      
    } else { 
      # Reset subsequent restart counter
213
      subs.restart <- 0L 
Tom Reynkens's avatar
Tom Reynkens committed
214 215 216 217
    }
    
    # Extra if-loop to evade possible subsequent restarting loop 
    # (e.g. can happen when accuracy of starting value is too high)
218
    if (subs.restart > 1) {
219
      warning("Two subsequent restarts were performed, while-loop is ended.")
Tom Reynkens's avatar
Tom Reynkens committed
220 221 222 223
      break
    }
    
    # Update value for theta
224 225
    # Note that theta = beta.new if objective function increased in this iteration since beta.new = beta.old then (see if-loop above)
    theta <- beta.new + (alpha.old - 1) / alpha.new  * (beta.new - beta.old)
Tom Reynkens's avatar
Tom Reynkens committed
226 227 228 229 230 231 232
    
    # Increase counter
    iter <- iter + 1L
    
    # Print info every 100 iterations
    if (print & (iter %% 100 == 0)) {
      print(paste("   Iteration:", iter))
233
      print(paste("   Objective function:", obj.beta.new)) 
Tom Reynkens's avatar
Tom Reynkens committed
234 235 236 237 238 239 240 241 242
      print(paste("   Step size:", step)) 
    }
  }
  
  # Convergence indicator
  conv <- numeric(0)
  # Maximum number of iterations reached
  if (iter == maxiter) conv <- 1L
  # Two subsequent restarts
243
  if (subs.restart > 1) conv <- c(conv, 2L) 
Tom Reynkens's avatar
Tom Reynkens committed
244 245 246 247 248 249 250 251 252 253 254 255 256
  # Low step size
  if (step < step.low) {
    conv <- c(conv, 3L) 
    # Issue warning
    warning(paste0("The step size is below ", step.low, " and is no longer reduced. ",
                   "It might be better to start the algorithm with a larger step size."))
  }
  # Succesful convergence
  if (length(conv) == 0) conv <- 0L
  
  # Return estimated coefficients, number of performed iterations, final step size and convergence indicator
  return(list(beta.new = beta.new, iter = iter, step = step, conv = conv))
}