Commit 8b06fd74 authored by Tom Reynkens's avatar Tom Reynkens

Always use "predictor" instead of "variable"

parent b70c7154
......@@ -16,7 +16,7 @@
# pen.cov: List with penalty type per predictor
# n.par.cov: List with number of parameters to estimate per predictor
# pen.mat: List with (weighted) penalty matrix per predictor
# pen.mat.transform: List with inverse of (weighted) penalty matrix for ordinal variables (Fused Lasso penalty)
# pen.mat.transform: List with inverse of (weighted) penalty matrix for ordinal predictors (Fused Lasso penalty)
.max.lambda <- function(X, y, weights, start, offset, family, pen.cov, n.par.cov, pen.mat, pen.mat.transform) {
# Number of covariates
......@@ -67,7 +67,7 @@
lambda.max[j] <- .min.maxnorm(A = t(pen.mat[[j]]), b = beta.tilde.split[[j]])
} else {
warning(paste0("'lambda.max' cannot be determined for variable '", names(pen.cov)[j], "'."))
warning(paste0("'lambda.max' cannot be determined for predictor '", names(pen.cov)[j], "'."))
}
}
}
......
......@@ -24,7 +24,7 @@
# lambda2: List with lambda2 multiplied with penalty weights (for Group Lasso penalty) per predictor. The penalty parameter for the L_2-penalty in Group (Generalized) Fused Lasso or Group Graph-Guided Fused Lasso is lambda*lambda2
# pen.mat: List with (weighted) penalty matrix per predictor
# pen.mat.aux: List with eigenvector matrix and eigenvalue vector of (weighted) penalty matrix per predictor
# pen.mat.transform: List with inverse of (weighted) penalty matrix for ordinal variables (Fused Lasso penalty)
# pen.mat.transform: List with inverse of (weighted) penalty matrix for ordinal predictors (Fused Lasso penalty)
# valdata: Validation data when selecting lambda out-of-sample
# control: List of parameters used in the fitting process
.select.lambda <- function(X, y, weights, start, offset, family, pen.cov, n.par.cov, group.cov, refcat.cov,
......@@ -91,7 +91,7 @@
cv.fold.allocation[tmp[, 1L]] <- tmp[, 2L]
}
# Indices of variables with Lasso or Group Lasso penalty
# Indices of predictors with Lasso or Group Lasso penalty
ind.stand <- which(pen.cov %in% c("lasso", "grouplasso"))
##################################
......@@ -207,7 +207,7 @@
" Please use a different (larger) value for 'k' in the control object."))
}
# Standardize X.train matrix for variables with Lasso or Group Lasso penalty (if standardize = TRUE)
# Standardize X.train matrix for predictors with Lasso or Group Lasso penalty (if standardize = TRUE)
list.stand <- .X.stand(X = X.train, standardize = attr(X, "standardize"), ind.stand = ind.stand,
n.par.cov = n.par.cov, weights = weights.train)
# Possibly standardized X.train matrix
......@@ -282,7 +282,7 @@
cl.lambda <- makeCluster(min(control$ncores, n.folds),
type = ifelse(.Platform$OS.type == "unix", "FORK", "PSOCK"), outfile = "")
# Export variables to use in cluster
# Export predictors to use in cluster
clusterExport(cl.lambda, varlist = c("n.folds", "y", "X", "family", "weights", "start",
"offset", "n.par.cov", "pen.cov", "ind.stand", "group.cov", "refcat.cov",
"lambda.vector", "lambda1", "lambda2",
......@@ -314,11 +314,11 @@
X.sds <- attr(X, "X.sds")
for (j in ind.stand) {
# Loop over variables with Lasso or Group Lasso penalty
# Loop over predictors with Lasso or Group Lasso penalty
# Index of first coefficient corresponding to this variable
# Index of first coefficient corresponding to this predictor
ind.start <- ifelse(j == 1, 1L, sum(unlist(n.par.cov[1:(j-1L)])) + 1L)
# Index of last coefficient corresponding to this variable
# Index of last coefficient corresponding to this predictor
ind.end <- sum(unlist(n.par.cov[1:j]))
# Indices
ind.s.e <- ind.start:ind.end
......
......@@ -31,7 +31,7 @@
###
# Vector of norms for Group Lasso
norms.group <- group.cov
# Compute norms per variable (for Group Lasso penalty)
# Compute norms per predictor (for Group Lasso penalty)
for (j in which(pen.cov == "grouplasso")) {
norms.group[j] <- sqrt(sum(beta.tilde.split[[j]]^2))
}
......@@ -42,10 +42,10 @@
if (length(groups.unique.nz) > 0) {
for (j in 1:length(groups.unique.nz)) {
# Indices of variables in the group
# Indices of predictors in the group
ind.group <- which(group.cov == groups.unique.nz[j])
# Combine norms in a group by summing the squared norms per variable
# For every variable in the group, the vector element is now equal to the norm of the group!
# Combine norms in a group by summing the squared norms per predictor
# For every predictor in the group, the vector element is now equal to the norm of the group!
norms.group[ind.group] <- sqrt(sum(norms.group[ind.group]^2))
}
}
......@@ -66,7 +66,7 @@
} else if (pen.cov[[j]] == "grouplasso") {
# Note that the norm of all coefficients in the group is used.
# For group 0 (i.e. no group), only the norm of the coefficients for variable j is used.
# For group 0 (i.e. no group), only the norm of the coefficients for predictor j is used.
return(.PO.GroupLasso(beta.tilde = beta.tilde.split[[j]],
slambda = diag(pen.mat.cov[[j]]) * lambda * step,
norm = norms.group[j]))
......@@ -101,7 +101,7 @@
# Create clusters, use forking on Unix platforms
cl <- makeCluster(po.ncores, type = ifelse(.Platform$OS.type == "unix", "FORK", "PSOCK"))
# Export variables to use in cluster
# Export predictors to use in cluster
clusterExport(cl, varlist = c("beta.tilde.split", "lambda", "lambda1", "lambda2", "step", "pen.mat.cov",
"pen.mat.cov.aux", "beta.old.split", "norms.group"),
envir = environment())
......
......@@ -34,11 +34,11 @@
} else if (pen.cov[[j]] == "grouplasso") {
if (group.cov[[j]] == 0) {
# Only for variables in group 0 (i.e. no group)
# Only for predictors in group 0 (i.e. no group)
pen <- lambda * sqrt(sum((pen.mat.cov[[j]] %*% beta)^2))
} else {
# Variables not in group 0 are treated later
# Predictors not in group 0 are treated later
pen <- 0
}
......@@ -65,12 +65,12 @@
# Loop over groups (except group 0)
for (j in 1:length(groups.unique.nz)) {
# Indices of variables in the group
# Indices of predictors in the group
ind.group <- which(group.cov == groups.unique.nz[j])
swn <- 0
for (l in ind.group) {
# Compute contribution of variable to squared weighted norm (swn) and add to swn
# Compute contribution of predictor to squared weighted norm (swn) and add to swn
swn <- swn + sum((pen.mat.cov[[l]] %*% beta.split[[l]])^2)
}
# Multiply weighted norm for group with lambda and add to total penalty
......
......@@ -5,27 +5,27 @@
###############################################
# Penalty matrix for Lasso penalized variable
# Penalty matrix for Lasso penalized predictor
#
# n.par: Number of parameters for the variable
# n.par: Number of parameters for the predictor
.pen.mat.lasso <- function(n.par) {
return(diag(n.par))
}
# Penalty matrix for Group Lasso penalized variable
# Penalty matrix for Group Lasso penalized predictor
#
# n.par: Number of parameters for the variable
# n.par: Number of parameters for the predictor
.pen.mat.grouplasso <- function(n.par) {
return(diag(n.par))
}
# Penalty matrix for Fused Lasso penalized variable
# Penalty matrix for Fused Lasso penalized predictor
#
# n.par: Number of parameters for the variable
# n.par: Number of parameters for the predictor
# refcat: Numeric indicating which of the levels is the reference category, default is first one
# when no reference category is present it is 0
.pen.mat.flasso <- function(n.par, refcat = 1) {
......@@ -43,9 +43,9 @@
}
# Penalty matrix for Generalized Fused Lasso penalized variable
# Penalty matrix for Generalized Fused Lasso penalized predictor
#
# n.par: Number of parameters for the variable
# n.par: Number of parameters for the predictor
# refcat: Logical which indicates if a reference category is present
.pen.mat.gflasso <- function(n.par, refcat = TRUE) {
......@@ -68,8 +68,8 @@
# Penalty matrix for 2D Fused Lasso penalty
#
# n.par.row: Number of parameters for the column variable
# n.par.col: Number of parameters for the row variable
# n.par.row: Number of parameters for the column predictor
# n.par.col: Number of parameters for the row predictor
.pen.mat.2dflasso <- function(n.par.row, n.par.col) {
total.columns <- n.par.row * n.par.col
......@@ -110,10 +110,10 @@
}
# Compute penalty matrix for Graph-Guided Fused Lasso penalized variable based on adjacency matrix
# Compute penalty matrix for Graph-Guided Fused Lasso penalized predictor based on adjacency matrix
#
# adj.matrix: Adjacency matrix of the graph
# lev.names: Names of the levels of the variable
# lev.names: Names of the levels of the predictor
# refcat: Numeric indicating which of the levels is the reference category, default is first one
# when no reference category is present it is 0
.pen.mat.ggflasso <- function(adj.matrix, lev.names = NULL, refcat = 1) {
......
......@@ -59,7 +59,7 @@
# Multiple elements of pen.weights have wrong length
} else if (length(ind) > 1) {
# Give variable names if not empty and number otherwise
# Give predictor names if not empty and number otherwise
if (is.null(names(ind))) {
tmp <- paste(as.character(ind), collapse=", ")
} else {
......@@ -86,7 +86,7 @@
# Multiple elements of pen.weights have wrong names
} else if (length(ind) > 1) {
# Give variable names if not empty and number otherwise
# Give predictor names if not empty and number otherwise
if (is.null(names(ind))) {
tmp <- paste(as.character(ind), collapse=", ")
} else {
......@@ -115,7 +115,7 @@
# offset: Vector containing the offset for the model
# family: Family object
# standardize: Logical indicating if the columns of X corresponding to Lasso or Group Lasso penalties
# ind.stand: Indices of variables with a Lasso or a Group Lasso penalty
# ind.stand: Indices of predictors with a Lasso or a Group Lasso penalty
# refcat: Indicator for reference category for Fused Lasso, Generalized Fused Lasso and Graph-Guided Fused Lasso
# n: Sample size
# lambda1: List with lambda1 per predictor. The penalty parameter for the L_1-penalty in Sparse (Generalized) Fused Lasso or Sparse Graph-Guided Fused Lasso is lambda*lambda1
......@@ -139,7 +139,7 @@
pen.weights <- n.par.cov
# Zero weight for non-penalized variables, hence no need to compute weights
# Zero weight for non-penalized predictors, hence no need to compute weights
# if only penalties of these types
if (!all(pen.cov == "none")) {
......@@ -224,11 +224,11 @@
if (pen.cov[[j]] == "grouplasso") {
if (group.cov[[j]] == 0) {
# Only for variables in group 0 (i.e. no group)
# Only for predictors in group 0 (i.e. no group)
pen.weights[[j]] <- as.numeric(1 / sqrt(sum((pen.mat[[j]] %*% beta.weights.split[[j]])^2)))
} else {
# Variables not in group 0 are treated later
# Predictors not in group 0 are treated later
pen.weights[[j]] <- 0
}
......@@ -256,12 +256,12 @@
# Loop over groups (except group 0)
for (j in 1:length(groups.unique.nz)) {
# Indices of variables in the group
# Indices of predictors in the group
ind.group <- which(group.cov == groups.unique.nz[j])
swn <- 0
for (l in ind.group) {
# Compute contribution of variable to squared weighted norm (swn) and add to swn
# Compute contribution of predictor to squared weighted norm (swn) and add to swn
swn <- swn + sum((pen.mat[[l]] %*% beta.weights.split[[l]])^2)
}
......@@ -276,7 +276,7 @@
} else {
# No need for standardization since zero weight for non-penalized variables
# No need for standardization since zero weight for non-penalized predictors
pen.weights.stand <- FALSE
}
......
......@@ -69,7 +69,7 @@
if (is.factor(data.gam[, cov.name])) {
# Categorical variable
# Categorical predictor
if (any(is.na(suppressWarnings(as.numeric(levels(data.gam[, cov.name])))))) {
# Non-numeric factor levels
......@@ -81,7 +81,7 @@
}
} else {
# Lasso or Group Lasso with continuous variable
# Lasso or Group Lasso with continuous predictor
pen.cov.gam <- paste0(pen.cov.gam, ".numeric")
}
}
......@@ -98,7 +98,7 @@
stop("GAM penalty weights do not work when a non-numeric factor has a Fused Lasso penalty and lambda1 > 0 or lambda2 > 0. Please use GLM penalty weights instead.")
}
# Add variables to formula
# Add predictors to formula
if (pen.cov.gam %in% c("none.numeric", "none.factor.nonnumeric",
"lasso.numeric", "lasso.factor.nonnumeric",
"grouplasso.numeric", "grouplasso.factor.nonnumeric",
......@@ -137,9 +137,9 @@
# Remove ".binned" if present
cov.names <- gsub(".binned*", "", cov.names)
# Check if cov.names exist as variable names
# Check if cov.names exist as predictor names
if (any(!cov.names %in% colnames(data.gam))) {
stop("Binned factors for interaction should have the original variable name + '.binned' as variable names, e.g. 'age.binned.")
stop("Binned factors for interaction should have the original predictor name + '.binned' as predictor names, e.g. 'age.binned.")
}
# TODO: add this to help file
......@@ -191,11 +191,11 @@
# Index of reference class, only used if refcat=TRUE
ref.ind <- which(data.gam[, cov.name] == levels(data.gam[, cov.name])[1])[1]
# Check if cov.names exist as variable names
# Check if cov.names exist as predictor names
if (!all(cov.names %in% colnames(data.gam))) {
warning(paste0("Latitude and longitude values for 'ggflasso' should have the original variable name",
" + '.lat' and '.lon' as variable names, e.g. 'zip.lat' and 'zip.lon. Since they are not given, ",
"a non-smooth effect is used for variable ", deparse(substitute(cov.name)), "."))
warning(paste0("Latitude and longitude values for 'ggflasso' should have the original predictor name",
" + '.lat' and '.lon' as predictor names, e.g. 'zip.lat' and 'zip.lon. Since they are not given, ",
"a non-smooth effect is used for predictor ", deparse(substitute(cov.name)), "."))
if (!refcat) {
stop("GAM penalty weights do not work for 'ggflasso' and lambda1 > 0 or lambda2 > 0 when no latitude and longitude are given. Please use GLM penalty weights instead.")
......@@ -268,7 +268,7 @@
pen.cov.gam[[j]] <- attr(mf_nodrop[, j], "penalty")
}
# Add variables to formula
# Add predictors to formula
if (pen.cov.gam[[j]] %in% c("none", "lasso", "grouplasso")) {
if (is.factor(data.gam[, cov.name])) {
......@@ -277,13 +277,13 @@
if (any(is.na(suppressWarnings(as.numeric(levels(data.gam[, cov.name])))))) {
# No numeric levels
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms", terms = cov.name)
} else {
# Numeric levels
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms",
terms = paste0("s(", cov.name, ".numer)"))
}
......@@ -333,13 +333,13 @@
if (any(is.na(suppressWarnings(as.numeric(levels(data.gam[, cov.name])))))) {
# No numeric levels
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms", terms = cov.name)
} else {
# Numeric levels
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms",
terms = paste0("s(", cov.name, ".numer)"))
}
......@@ -357,7 +357,7 @@
} else if (pen.cov.gam[[j]] == "gflasso") {
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms", terms = cov.name)
# Get coefficient per level (except reference level)
......@@ -380,14 +380,14 @@
# Remove ".binned" if present
cov.names <- gsub(".binned*", "", cov.names_orig)
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms",
terms = paste0("s(", cov.names[1], ".numer,", cov.names[2], ".numer):dummy.",
cov.names[1], ".", cov.names[2]))
# Levels of first (binned) variable
# Levels of first (binned) predictor
lev.1 <- levels(data.gam[, cov.names_orig[1]])
# Levels of second (binned) variable
# Levels of second (binned) predictor
lev.2 <- levels(data.gam[, cov.names_orig[2]])
ind2 <- 1L
......@@ -404,15 +404,15 @@
} else if (pen.cov.gam[[j]] == "ggflasso") {
# Check if exist as variable names
# Check if exist as predictor names
if (!all(c(paste0(cov.name, ".lat"), paste0(cov.name, ".lon")) %in% colnames(data.gam))) {
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms", terms = cov.name)
} else {
# Predict GAM model for current variable
# Predict GAM model for current predictor
pg <- predict.gam(gam.fit.pw, newdata = data.gam, type = "terms",
terms = paste0("s(", cov.name, ".lat,", cov.name, ".lon)"))
......
......@@ -189,7 +189,7 @@
stop("'n.par.cov' and 'pen.cov' should be lists of equal length.")
}
# Index of variables with 2D fused Lasso penalty
# Index of predictors with 2D fused Lasso penalty
ind2d <- which(pen.cov == "2dflasso")
if (length(ind2d) > 0) {
......@@ -222,7 +222,7 @@
n.par.cov.2dflasso <- NULL
}
# Index of variables without 2D fused Lasso penalty
# Index of predictors without 2D fused Lasso penalty
ind.non.2d <- which(pen.cov != "2dflasso")
if (length(ind.non.2d) > 0) {
......@@ -425,10 +425,10 @@
adj.matrix <- list()
}
# Indices of variables with ggflasso penalty
# Indices of predictors with ggflasso penalty
ind.adj <- which(pen.cov == "ggflasso")
# Transform to list if adjacency matrix is not given as a list and only one variable with ggflasso penalty
# Transform to list if adjacency matrix is not given as a list and only one predictor with ggflasso penalty
if (length(ind.adj) == 1 & !is.list(adj.matrix)) {
adj.matrix <- list(adj.matrix)
names(adj.matrix)[1] <- names(pen.cov)[ind.adj]
......@@ -466,13 +466,13 @@
stop("A numeric matrix or element of class Matrix was expected in element ", i, " of 'adj.matrix'.")
}
# Check if list element of adj.matrix has correct number of rows (number of levels of the corresponding variable)
# Check if list element of adj.matrix has correct number of rows (number of levels of the corresponding predictor)
if ((n.par.cov[[ind.adj[i]]] + refcat) != nrow(adj.matrix[[i]])) {
stop("Element ", i, " of 'adj.matrix' needs to be a square matrix with ", (n.par.cov[[ind.adj[i]]] + refcat),
" rows (the number of levels of the corresponding variable).")
" rows (the number of levels of the corresponding predictor).")
}
# Level names (without reference level if refcat=TRUE) of variable
# Level names (without reference level if refcat=TRUE) of predictor
lev.names <- gsub(names(pen.cov)[ind.adj[i]], "", unlist(col.names.split[ind.adj[i]]))
# Row names of adjacency matrix
spat.names <- rownames(adj.matrix[[i]])
......@@ -483,7 +483,7 @@
}
if (!isTRUE(all.equal(as.character(lev.names), spat.names))) {
stop("The rownames of element ", i, " of 'adj.matrix' are not the same as the level names of the corresponding variable.",
stop("The rownames of element ", i, " of 'adj.matrix' are not the same as the level names of the corresponding predictor.",
" Note that the order of the names is also important.")
}
}
......
......@@ -35,15 +35,15 @@
#' \item{3}{Low step size (i.e. below 1e-14).}
#' }}
#' \item{final.stepsize}{Final step size used in the algorithm.}
#' \item{n.par.cov}{List with number of parameters to estimate per covariate (predictor).}
#' \item{pen.cov}{List with penalty type per covariate (predictor).}
#' \item{group.cov}{List with group of each covariate (predictor) for Group Lasso where 0 means no group.}
#' \item{refcat.cov}{List with number of the reference category in the original order of the levels of each covariate (predictor) where 0 indicates no reference category.}
#' \item{n.par.cov}{List with number of parameters to estimate per predictor (covariate).}
#' \item{pen.cov}{List with penalty type per predictor (covariate).}
#' \item{group.cov}{List with group of each predictor (covariate) for Group Lasso where 0 means no group.}
#' \item{refcat.cov}{List with number of the reference category in the original order of the levels of each predictor (covariate) where 0 indicates no reference category.}
#' \item{control}{The used control list, see \code{\link{glmsmurf.control}}.}
#' Optionally, following elements are also included:
#' \item{X}{The model matrix, only returned when the argument \code{x.return} in \code{\link{glmsmurf}} or \code{\link{glmsmurf.fit}} is \code{TRUE}.}
#' \item{y}{The response vector, only returned when the argument \code{y.return} in \code{\link{glmsmurf}} or \code{\link{glmsmurf.fit}} is \code{TRUE}.}
#' \item{pen.weights}{List with the vector of penalty weights per covariate (predictor), only returned when the argument \code{pen.weights.return} in \code{\link{glmsmurf}} or \code{\link{glmsmurf.fit}} is \code{TRUE}.}
#' \item{pen.weights}{List with the vector of penalty weights per predictor (covariate), only returned when the argument \code{pen.weights.return} in \code{\link{glmsmurf}} or \code{\link{glmsmurf.fit}} is \code{TRUE}.}
#' When the model is re-estimated, i.e. \code{reest = TRUE} in \code{\link{glmsmurf.control}},
#' the following components are also present:
#' \item{glm.reest}{Output from the call to \code{\link[speedglm]{speedglm}} or \code{\link[stats]{glm}} to fit the re-estimated model.}
......
......@@ -51,7 +51,7 @@
#' \item \code{"gam"} (ad. GAM penalty weights),
#' \item \code{"gam.stand"} (stand. ad. GAM penalty weights);
#' }
#' or a list with the penalty weight vector per covariate (predictor). This list should have length equal to the number of predictors and predictor names as element names.
#' or a list with the penalty weight vector per predictor. This list should have length equal to the number of predictors and predictor names as element names.
#' @param adj.matrix A named list containing the adjacency matrices (a.k.a. neighbor matrices) for each of the predictors with a Graph-Guided Fused Lasso penalty.
#' The list elements should have the names of the corresponding predictors. If only one predictor has a Graph-Guided Fused Lasso penalty,
#' it is also possible to only give the adjacency matrix itself (not in a list).
......@@ -59,7 +59,7 @@
#' The returned coefficients are always on the original (i.e. non-standardized) scale.
#' @param x.return Logical indicating if the used model matrix should be returned in the output object, default is \code{FALSE}.
#' @param y.return Logical indicating if the used response vector should be returned in the output object, default is \code{TRUE}.
#' @param pen.weights.return Logical indicating if the list of the used penalty weight vector per covariate (predictor) should be returned in the output object, default is \code{FALSE}.
#' @param pen.weights.return Logical indicating if the list of the used penalty weight vector per predictor should be returned in the output object, default is \code{FALSE}.
#' @param control A list of parameters used in the fitting process. This is passed to \code{\link{glmsmurf.control}}.
#' @return An object of class \code{"glmsmurf"} is returned. See \code{\link{glmsmurf-class}} for more details about this class and its generic functions.
#'
......@@ -212,7 +212,7 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
# Check for unused factor levels
if (is.factor(mf_nodrop[, j])) {
if (!isTRUE(all.equal(levels(mf_nodrop[, j]), levels(mf[, j])))) {
stop(paste0("No unused levels can be present in the variable '", cov.names[j + (inter - 1)],"'.",
stop(paste0("No unused levels can be present in the predictor '", cov.names[j + (inter - 1)],"'.",
" Please remove the unused factor levels."))
}
}
......@@ -230,14 +230,14 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
}
} else if (pen.cov[[j + (inter - 1)]] %in% c("lasso", "grouplasso")) {
# No reference category for Lasso and Group Lasso penalized variables,
# No reference category for Lasso and Group Lasso penalized predictors,
# this is imposed through contrast.args
# Only use contrasts if a factor
if (is.factor(mf[, j])) {
name <- colnames(mf)[j]
# Set contrasts to FALSE for variable => no reference category
# Note that the list element has the same name as the variable
# Set contrasts to FALSE for predictor => no reference category
# Note that the list element has the same name as the predictor
contrasts[[name]] <- contrasts(mf[, j], contrasts = FALSE)
}
......@@ -250,8 +250,8 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
# No reference category, this is imposed through contrast.args
name <- colnames(mf)[j]
# Set contrasts to FALSE for variable => no reference category
# Note that the list element has the same name as the variable
# Set contrasts to FALSE for predictor => no reference category
# Note that the list element has the same name as the predictor
contrasts[[name]] <- contrasts(mf[, j], contrasts = FALSE)
# Number of parameters to estimate
......@@ -272,7 +272,7 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
# Check if 1D effects (or non-binned 1D effects) are also included
error2d <- paste0(" should also be included in the model ",
"since this variable (or a binned version) is present in an interaction. ",
"since this predictor (or a binned version) is present in an interaction. ",
"Please mind that 1D effects need to be included in the formula before 2D effects!")
......@@ -289,16 +289,16 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
# Levels of pred2
l2 <- levels(data[, attr(mf_nodrop[, j], "cov.names")[[2]]])
# Check that reference category is not changed for both 1D variables (or their non-binned versions)
# Check that reference category is not changed for both 1D predictors (or their non-binned versions)
n1 <- gsub(".binned*", "", attr(mf_nodrop[, j], "cov.names")[[1]])
if (refcat.cov[[which(cov.names == n1)]] > 1) {
stop(paste0("The reference category for the variable '", n1,
stop(paste0("The reference category for the predictor '", n1,
" 'cannot be changed as it (or its binned version) is included in a 2D effect."))
}
n2 <- gsub(".binned*", "", attr(mf_nodrop[, j], "cov.names")[[2]])
if (refcat.cov[[which(cov.names == n2)]] > 1) {
stop(paste0("The reference category for the variable '", n2,
stop(paste0("The reference category for the predictor '", n2,
" 'cannot be changed as it (or its binned version) is included in a 2D effect."))
}
......@@ -339,7 +339,7 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
names(pen.cov) <- names(n.par.cov) <- names(group.cov) <- names(refcat.cov) <- cov.names
if (anyDuplicated(cov.names) > 0) {
stop("A variable can only be included once in the formula (except in a 2D Fused Lasso).")
stop("A predictor can only be included once in the formula (except in a 2D Fused Lasso).")
}
#########
......@@ -418,7 +418,7 @@ glmsmurf <- function(formula, family, data, weights, start, offset, lambda, lamb
if (inter_bool) {
# Combine two variable names
# Combine two predictor names
col.names[j] <- gsub(",\\s*", ".", col.names[j])
# Add "inter."
col.names[j] <- paste0("inter.", col.names[j])
......
......@@ -12,15 +12,15 @@
#' @param X Only for \code{glmsmurf.fit}: the design matrix including ones for the intercept. A \code{n} by \code{(p+1)} matrix which can
#' be of numeric matrix class (\code{\link[methods:StructureClasses]{matrix-class}}) or of class Matrix (\code{\link[Matrix]{Matrix-class}}) including sparse matrix class (\code{\link[Matrix]{dgCMatrix-class}}).
#' @param y Only for \code{glmsmurf.fit}: the response vector, a numeric vector of size \code{n}.
#' @param pen.cov Only for \code{glmsmurf.fit}: list with penalty type per covariate (predictor). A named list of strings with covariate (predictor) names as element names.
#' @param pen.cov Only for \code{glmsmurf.fit}: list with penalty type per predictor (covariate). A named list of strings with predictor names as element names.
#' Possible types: \code{"none"} (no penalty, e.g. for intercept), \code{"lasso"}, \code{"grouplasso"},
#' \code{"flasso"} (Fused Lasso), \code{"gflasso"} (Generalized Fused Lasso),
#' \code{"2dflasso"} (2D Fused Lasso) or \code{"ggflasso"} (Graph-Guided Fused Lasso).
#' @param n.par.cov Only for \code{glmsmurf.fit}: list with number of parameters to estimate per covariate (predictor). A named list of strictly positive integers with covariate (predictor) names as element names.
#' @param group.cov Only for \code{glmsmurf.fit}: list with group of each covariate (predictor) which is only used for the Group Lasso penalty.
#' A named list of positive integers with covariate names as element names where 0 means no group.
#' @param refcat.cov Only for \code{glmsmurf.fit}: list with number of the reference category in the original order of the levels of each covariate (predictor).
#' When the covariate (predictor) is not a factor or no reference category is present, it is equal to 0. This number will only be taken into account for a Fused Lasso, Generalized Fused Lasso or Graph-Guided Fused Lasso penalty
#' @param n.par.cov Only for \code{glmsmurf.fit}: list with number of parameters to estimate per predictor (covariate). A named list of strictly positive integers with predictor names as element names.
#' @param group.cov Only for \code{glmsmurf.fit}: list with group of each predictor (covariate) which is only used for the Group Lasso penalty.
#' A named list of positive integers with predictor names as element names where 0 means no group.
#' @param refcat.cov Only for \code{glmsmurf.fit}: list with number of the reference category in the original order of the levels of each predictor (covariate).
#' When the predictor is not a factor or no reference category is present, it is equal to 0. This number will only be taken into account for a Fused Lasso, Generalized Fused Lasso or Graph-Guided Fused Lasso penalty
#' when a reference category is present.
glmsmurf.fit <- function(X, y, weights, start, offset, family, pen.cov, n.par.cov, group.cov, refcat.cov,
lambda, lambda1 = 0, lambda2 = 0, pen.weights, adj.matrix, standardize = TRUE, control = list(),
......@@ -273,15 +273,15 @@ glmsmurf.fit <- function(X, y, weights, start, offset, family, pen.cov, n.par.co
######################################
# Standardize
# Indices of variables with Lasso or Group Lasso penalty
# Indices of predictors with Lasso or Group Lasso penalty
ind.stand <- which(pen.cov %in% c("lasso", "grouplasso"))
# Standardize X matrix for variables with Lasso or Group Lasso penalty (if standardize = TRUE)
# Standardize X matrix for predictors with Lasso or Group Lasso penalty (if standardize = TRUE)
list.stand <- .X.stand(X = X, standardize = standardize, ind.stand = ind.stand, n.par.cov = n.par.cov, weights = weights)
# Possibly standardized X matrix
X <- list.stand$X
# Set attribute indicating if the variables for Lasso and Group Lasso are standardized
# Set attribute indicating if the predictors for Lasso and Group Lasso are standardized
attr(X, "standardize") <- standardize
# Add X.means and X.sds as attributes of X
attr(X, "X.means") <- list.stand$X.means
......@@ -305,7 +305,7 @@ glmsmurf.fit <- function(X, y, weights, start, offset, family, pen.cov, n.par.co
# No change needed if reference category is not the first level since order does not matter!
gflasso = .pen.mat.gflasso(n.par.cov[[j]], refcat = refcat),
"2dflasso" = if(length(n.par.cov.2dflasso[[j]]) > 1) {
# Second and third element are number of levels from 1D variables minus 1
# Second and third element are number of levels from 1D predictors minus 1
.pen.mat.2dflasso(n.par.cov.2dflasso[[j]][2], n.par.cov.2dflasso[[j]][3])
} else {
.pen.mat.2dflasso(sqrt(n.par.cov.2dflasso[[j]]), sqrt(n.par.cov.2dflasso[[j]]))
......@@ -357,7 +357,7 @@ glmsmurf.fit <- function(X, y, weights, start, offset, family, pen.cov, n.par.co
for (j in 1:n.cov) {
pen.mat[[j]] <- pen.mat[[j]] * pen.weights[[j]]
# Compute inverse of pen.mat for ordinal variables
# Compute inverse of pen.mat for ordinal predictors
if (pen.cov[[j]] == "flasso") {
pen.mat.transform[[j]] <- t(upper.tri(pen.mat[[j]], diag=TRUE) * (0 * pen.mat[[j]] + 1) / pen.weights[[j]])
......
......@@ -155,12 +155,12 @@
}
######
# Indices of variables with penalty different from 2dflasso
# Indices of predictors with penalty different from 2dflasso
ind <- which(pen.cov %in% c("lasso", "grouplasso", "flasso", "gflasso", "ggflasso"))
if (length(ind) > 0) {
# Split beta.new vector based on variables
# Split beta.new vector based on predictors
beta.new.split <- split(beta.new, rep(1:n.cov, n.par.cov))
nms <- names(beta.new)
......@@ -175,7 +175,7 @@
if (!(pen.cov[[j]] %in% c("flasso", "gflasso", "ggflasso") &
(lambda * lambda1.orig == 0 & lambda * lambda2.orig == 0))) {
# If all coefficients for the variable are non-zero, and there is more than one coefficient,
# If all coefficients for the predictor are non-zero, and there is more than one coefficient,
# problems arise due to multicollinearity. This is solved by setting the first coefficient
# (and all other coefficients which are equal to this one) to 0 to make the first (or the chosen) category the reference category
ind.ref <- ifelse(refcat.cov[[j]] == 0, 1L, refcat.cov[[j]])
......@@ -188,7 +188,7 @@
}
if (warn > 0) {
warning("Reference classes are introduced for some variable(s) in the re-estimated model.")
warning("Reference classes are introduced for some predictor(s) in the re-estimated model.")
}
# Transform back to beta vector and use original names
# Use new vector such that output for beta.new is not altered!
......@@ -229,7 +229,7 @@
# Re-estimated coefficients per group
beta.reest <- glm.reest$coefficients
beta.reest.long <- rep(0, length(beta.new2))
# Make new coefficient vector with re-estimated coefficients per variable
# Make new coefficient vector with re-estimated coefficients per predictor
for (i in 1:length(beta.reduced)) {
par <- beta.reduced[i]
beta.reest.long[which(beta.new2 == par)] <- beta.reest[i]
......@@ -245,7 +245,7 @@
###########
# Transform X and beta and beta.reest.long to original scale
# Logical indicating if the variables for Lasso and Group Lasso are standardized
# Logical indicating if the predictors for Lasso and Group Lasso are standardized
if (!is.null(attr(X, "standardize"))) {
standardize <- attr(X, "standardize")
......@@ -254,7 +254,7 @@
standardize <- FALSE
}
# Indices of variables with Lasso or Group Lasso penalty
# Indices of predictors with Lasso or Group Lasso penalty
ind.stand <- which(pen.cov %in% c("lasso", "grouplasso"))
if (standardize & length(ind.stand) > 0) {
......@@ -267,11 +267,11 @@
X.sds <- attr(X, "X.sds")
for (j in ind.stand) {
# Loop over variables with Lasso or Group Lasso penalty
# Loop over predictors with Lasso or Group Lasso penalty
# Index of first coefficient corresponding to this variable
# Index of first coefficient corresponding to this predictor
ind.start <- ifelse(j == 1, 1L, sum(unlist(n.par.cov[1:(j-1L)])) + 1L)