Lambda_max.R 5.37 KB
Newer Older
Tom Reynkens's avatar
Tom Reynkens committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
###############################################
#
# Maximum value of lambda
#
###############################################


# Compute maximum value of lambda per covariate and return maximum over all covariates
#
# X: The design matrix including ones for the intercept
# y: Response vector
# weights: Vector of prior weights
# start: vector of starting values for the coefficients
# offset: Vector containing the offset for the model
# family: Family object
# pen.cov: List with penalty type per predictor
# n.par.cov: List with number of parameters to estimate per predictor
# pen.mat: List with (weighted) penalty matrix per predictor
19
# pen.mat.transform: List with inverse of (weighted) penalty matrix for ordinal predictors (Fused Lasso penalty)
Tom Reynkens's avatar
Tom Reynkens committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
.max.lambda <- function(X, y, weights, start, offset, family, pen.cov, n.par.cov, pen.mat, pen.mat.transform) {
  
  # Number of covariates
  n.cov <- length(n.par.cov)
  
  ###########
  # Initialisation
  
  # Starting value for mu (using starting value for beta)
  eta <- as.numeric(X %*% start + offset)
  mu <- family$linkinv(eta)
  
  ###########
  # Gradient update
  grad <- as.numeric((weights * (mu - y) / family$variance(mu) * family$mu.eta(eta)) %*% X / sum(weights != 0))
  # Note that this is the same as
  #grad <- as.numeric(Matrix::t(X) %*% as.vector(weights * (mu - y) / family$variance(mu) * family$mu.eta(eta)) / sum(weights != 0))
  
  # Compute gradient update
  beta.tilde <- start - grad
  
  # Split beta.tilde into groups defined by covariates
  beta.tilde.split <- split(beta.tilde, rep(1:n.cov, n.par.cov))
  
  lambda.max <- numeric(length(n.par.cov))
  
  for (j in 1:length(beta.tilde.split)) {
    
    if (pen.cov[[j]] == "lasso") {
      
      # Based on Lasso problem with y = beta, X = diag and penalty = lambda * sum(pen.weights * |beta|)
      lambda.max[j] <- max(abs(beta.tilde.split[[j]]) / diag(pen.mat[[j]]))
      
    } else if (pen.cov[[j]] == "grouplasso") {
      
      # Based on Group Lasso problem with y = beta, X = diag and penalty = lambda * pen.weights * ||beta||_2
      lambda.max[j] <- sqrt(sum((beta.tilde.split[[j]] / diag(pen.mat[[j]]))^2))
      
    } else if (pen.cov[[j]] == "flasso") {
      
      # Based on dual problem, Lasso with y = beta, X = pen.mat.transform and penalty = lambda * sum(|beta|)
      lambda.max[j] <- max(abs(t(pen.mat.transform[[j]]) %*% beta.tilde.split[[j]]))
      
    } else if (pen.cov[[j]] %in% c("gflasso", "2dflasso", "ggflasso")) {
      # Minimal lambda such that all elements of u are zero is equal to
      # maximum norm of minimal x (in sense of maximum norm) such that pen.mat^t*x=beta
      if (nrow(pen.mat[[j]]) > ncol(pen.mat[[j]])) {
        lambda.max[j] <- .min.maxnorm(A = t(pen.mat[[j]]), b = beta.tilde.split[[j]])
        
      } else {
70
        warning(paste0("'lambda.max' cannot be determined for predictor '", names(pen.cov)[j], "'."))
Tom Reynkens's avatar
Tom Reynkens committed
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
      }
    }
  }
  
  return(lambda.max)
}


# Compute maximum norm of minimal x (in sense of maximum norm) such that A*x = b
# where A is a matrix of full rank, and length(b) < length(x)
# Earle et al. (2017, J Optim Theory Appl)
.min.maxnorm <- function(A, b) {
  
  # Solution to A*x = b that minimises maximum norm of x
  x0 <- .PPF(A, b)
  
  # Compute generalised inverse of A
  A.ginv <- ginv(A)
  A.kernel <- round(diag(nrow = nrow(A.ginv)) - A.ginv %*% A, 10)
  
  # Find minimum of ||x0 + A.kernel * x||_infty
  # Note that all solutions of A*x = b are of the form x0 + A.kernel * x
  opt <- optim(par = 0*x0, fn = .maxnorm, method = "BFGS",
               control = list(maxit = 1e6, reltol = 1e-10), x0 = x0, A.kernel = A.kernel)
  
  return(opt$value)
}

# Maximum norm of x0 + A.kernel * z
.maxnorm <- function(z, x0, A.kernel) {
  return(max(abs(x0 + A.kernel %*% z)))
}


# Primal Path Following (PPF) method (for underdetermined systems), phase 1 
# Earle et al. (2017, J Optim Theory Appl)
.PPF <- function(A, b) {
  
  # Initialisation, m<n for underdetermined system
  m <- nrow(A)
  n <- ncol(A)
  if (n <= m) {
    stop("Matrix 'A' is not underdetermined.")
  }
  
  # Initial starting point
  xk <- ginv(A) %*% b
  dk <- 0 * xk
  ak <- 0
  
  # Initial starting index
  Active.Set <- which(abs(xk) == max(abs(xk)))
  
  # Perform step 1 of algorithm to obtain maximal active set
  while (length(Active.Set) < n-m+1) {
    
    # Number of non-active components
    Non.Active.Set <- (1:length(xk))[-Active.Set]
    n.ASc <- n - length(Active.Set)
    
    # Split into active and non-active components
    A.AS <- matrix(A[, Active.Set], ncol = length(Active.Set))
    A.ASc <- matrix(A[, -Active.Set], ncol = n.ASc)
    xk.AS <- xk[Active.Set]
    #xk.aSc <- xk[-Active.Set]
    
    if (rankMatrix(A.ASc) < m) {
      break
    }
    
    # Calculate descent direction
    dk.AS <- -sign(xk.AS)
    dk.ASc <- ginv(A.ASc) %*% A.AS %*% sign(xk.AS)
    
    dk[Active.Set] <- dk.AS
    dk[-Active.Set] <- dk.ASc
    
    # Calculate step size
    a.possible.set <- rep(-1, n)
    xi <- xk[Active.Set[1]]
    di <- dk[Active.Set[1]]
    
    for (j in Non.Active.Set) {
      a.one <- (xi - xk[j]) / (dk[j] - di)
      a.two <- (-xi - xk[j]) / (dk[j] - (-di))
      a.cand <- c(a.one, a.two)
      
      a.possible.set[j] <- ifelse(a.one < 0 & a.two < 0, -1, min(a.cand[which(a.cand > 0)]))
    }
    ak <- min(a.possible.set[which(a.possible.set > 0)])
    
    xk <- xk + ak * dk
    Active.Set <- which(abs(xk) == max(abs(xk)))
    
  }
  
  return(xk)
  
}