...
 
Commits (4)
......@@ -7,7 +7,7 @@ r-base:
image: r-base
script:
- apt-get update -qq
- apt-get install -y --no-install-recommends libcurl4-openssl-dev libnetcdf-dev libssh2-1-dev libssl-dev libudunits2-dev libxml2-dev git
- apt-get install -y --no-install-recommends libcurl4-openssl-dev libssh2-1-dev libssl-dev libxml2-dev git
- Rscript -e 'install.packages(c("devtools", "roxygen2", "testthat"), repos = "https://cloud.r-project.org/")'
- Rscript -e 'devtools::install_deps()'
- Rscript -e 'devtools::check()'
......
Package: claut
Type: Package
Title: Functions from the University of Toronto Climate Lab
Version: 0.1.2
Date: 2017-03-24
Version: 0.1.3
Date: 2018-03-19
Authors@R: c(person(given = c("Conor", "I."), family = "Anderson",
role = c("aut","cre"), email = "conor.anderson@mail.utoronto.ca"),
role = c("aut","cre"), email = "conor.anderson@utoronto.ca"),
person(given = c("William", "A."), family = "Gough", role = "ths",
email = "gough@utsc.utoronto.ca"))
Maintainer: Conor I. Anderson <conor.anderson@mail.utoronto.ca>
Maintainer: Conor I. Anderson <conor.anderson@utoronto.ca>
Description: Collection of functions developed at the University of
Toronto Climate lab, often stylized as "Climate Lab at University of Toronto,
CL@UT".
Depends:
R (>= 3.1.0),
PCICt
R (>= 3.1.0)
Imports:
doParallel,
dplyr,
foreach,
gdata,
magrittr,
ncdf4,
ncdf4.helpers,
iterators,
parallel,
readr,
stats,
storr,
tibble,
utils,
zoo
......
# Generated by roxygen2: do not edit by hand
export(calc_anoms)
export(dataEliminator)
export(dataEliminatorMassive)
export(dataEliminatorThorough)
export(deltaDTD)
export(ensemble_means)
export(gcm_anomalies)
export(generate_wkt_csv)
export(get_gcm_files)
export(missing_data_lab)
export(parse_ASCII_grid)
export(trimData)
importFrom(dplyr,arrange)
importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(doParallel,registerDoParallel)
importFrom(dplyr,bind_rows)
importFrom(dplyr,group_by)
importFrom(dplyr,group_indices)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(dplyr,summarize_all)
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
importFrom(gdata,unmatrix)
importFrom(magrittr,"%>%")
importFrom(ncdf4,nc_close)
importFrom(ncdf4,nc_open)
importFrom(ncdf4,ncvar_get)
importFrom(ncdf4.helpers,get.split.filename.cmip5)
importFrom(ncdf4.helpers,nc.get.time.series)
importFrom(ncdf4.helpers,nc.get.var.subset.by.axes)
importFrom(iterators,icount)
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(readr,read_table)
importFrom(readr,write_csv)
importFrom(stats,aggregate)
importFrom(stats,sd)
importFrom(storr,storr_rds)
importFrom(tibble,add_column)
importFrom(tibble,as_tibble)
importFrom(tibble,has_name)
importFrom(tibble,tibble)
importFrom(utils,count.fields)
importFrom(utils,read.csv)
......
#' Calculate the anomaly values for a table produced by gcm_anomalies()
#'
#' @param datin a \code{tbl_df} produced by gcm_anomalies().
#'
#' @importFrom dplyr filter
#' @importFrom tibble has_name
#' @export
calc_anoms <- function(datin) {
dat_proj <- filter(datin, Scenario != "historical")
dat_hist <- filter(datin, Scenario == "historical")
if (has_name(datin, "Ensembles")) {
stop("It seems you already averaged the ensembles. This function needs to be run first.")
}
for (i in 1:nrow(dat_hist)) {
dat_proj[dat_proj$Model == dat_hist$Model[i] & dat_proj$Variable == dat_hist$Variable[i] & dat_proj$Ensemble == dat_hist$Ensemble[i], 5] <- unlist(dat_hist[i, 5])
for (col in 6:ncol(dat_proj)) {
rows <- which(dat_proj$Model == dat_hist$Model[i] & dat_proj$Variable == dat_hist$Variable[i] & dat_proj$Ensemble == dat_hist$Ensemble[i])
for (row in rows) {
dat_proj[row, col] <- dat_proj[row, col] - dat_hist[i, 5]
}
}
}
dat_proj[!is.na(dat_proj[[5]]),]
}
#' Eliminate daily values from a monthly data set
#'
#' @param month data.frame; a data.frame with a single year-month of data with no missing values
#' @param csv character; path to a .csv file containing a single year-month of data with no missing values
#' @param NAs numeric; a vector of the number of NA values to test
#' @param sampling character; the type of sampling to use: (r)andom, (c)onsecutive
#' @param variables character; the names of the variables to test (we will try to auto-idenify the column number)
#' @param simplify Boolean; whether to return simplified results
#' @param interpolate Boolean; whether to use linear interpolation to approximate the missing values
#'
#' @importFrom stats sd
#' @importFrom utils read.csv
#' @importFrom zoo na.approx
#'
#' @export
#'
dataEliminator <- function(month, csv, NAs, sampling = "z", variables = c("max", "min", "mean"), simplify = FALSE, interpolate = FALSE) {
## First, I hold the users hand, making sure that we have all of the information we need to perform the test.
# Set up the data.frame `month`
if (missing(month)) {
if (missing(csv)) csv <- readline(prompt = "Please specify csv file. ")
month <- read.csv(csv)
}
# Get the range of `k` and sampling type
if (missing(NAs)) NAs <- as.numeric(readline(prompt = "Enter vector of values of `k` to test: "))
while (sampling != "c" & sampling != "r") {
sampling <- readline(prompt = "Do you want NAs to be generated (r)andomly, or (c)onsecutively? ")
if (sampling != "c" & sampling != "r") print("You must choose either (c) or (r).")
}
for (var in seq_along(variables)) {
variables[var] <- as.numeric(grep(variables[var], names(month), ignore.case = TRUE))
}
variables <- as.numeric(variables)
# We make an empty data frame where we will store our results
df <- data.frame(stringsAsFactors = FALSE)
# Now we start an outer loop, for each value of `k`
for (k in NAs) {
# First we generate our random data points that will be nullified
if (sampling == "r") {
NAvec <- sample(1:nrow(month), size = k, replace = FALSE)
}
if (sampling == "c") {
NAvec <- sample(1:(nrow(month)+1-k), size = 1, replace = FALSE)
NAvec <- seq(NAvec, NAvec-1+k)
}
# We do the nullifying
monthMod <- month
monthMod[NAvec,] <- NA
# Now we start an inner loop that does all the calculations and builds the results table
for (var in variables) {
colname <- colnames(month[var])
mean <- mean(month[[var]], na.rm = T)
SD <- sd(month[[var]], na.rm = T)
meanMod <- mean(monthMod[[var]], na.rm = T)
deltaMean <- abs(meanMod-mean)
prop <- deltaMean/SD
if (interpolate) {
monthApprox <- na.approx(monthMod[[var]])
prop <- c(prop, abs(mean(monthApprox, na.rm = T)-mean)/SD)
}
# Then we stick it all together in a table.
row <- c(colname, k, mean, SD, meanMod,deltaMean, prop)
df <- rbind(df, row, stringsAsFactors = FALSE)
}
}
if (interpolate) {
names(prop) <- c("Mean.Error", "Mean.Error.Approx")
names(df) <- c("Variable", "No.NA", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error", "Mean.Error.Approx")
} else {
names(df) <- c("Variable", "No.NA", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error")
}
# If we chose the simplify option, we just get the binary result. This is useful for running the test a massive number of times, but only for a single variable. See the appendix!
if (simplify) {
return(prop)
} else {
return(df)
}
}
#' Perform the mass elimination of daily values from a monthly data set
#'
#' @param numberTests integer; the number of times to repeat the test
#' @param month data.frame; a data.frame with a single year-month of data with no missing values
#' @param csv character; path to a .csv file containing a single year-month of data with no missing values
#' @param NAs numeric; a vector of the number of NA values to test
#' @param sampling character; the type of sampling to use: (r)andom, (c)onsecutive
#' @param variable character; the name of the variable to test (we will try to auto-idenify the column number)
#' @param verbose Boolean; whether the function should be verbose
#' @param interpolate Boolean; whether to use linear interpolation to approximate the missing values
#'
#' @importFrom utils read.csv
#'
#' @export
#'
dataEliminatorMassive <- function(numberTests, month, csv, NAs, sampling = "z", variable = c("max", "min", "mean"), verbose = FALSE, interpolate = FALSE) {
#source("dataEliminator.R")
# We again hold the users hand to come up with all the info we need
if (missing(numberTests)) numberTests <- as.numeric(readline(prompt = "How many times should I repeat the test? "))
if (missing(month) && missing(csv)) csv <- readline(prompt = "Please specify csv file. ")
if (missing(month) && !missing(csv)) month <- read.csv(csv)
if (missing(NAs)) NAs <- as.numeric(readline(prompt = "Enter vector of values of `k` to test: "))
while (sampling != "c" & sampling != "r") {
sampling <- readline(prompt = "Do you want NAs to be generated (r)andomly, or (c)onsecutively? ")
if (sampling != "c" & sampling != "r") print("You must choose either (c) or (r).")
}
while (length(variable) > 1 | (variable != "mean" && variable != "max" && variable != "min")) {
print("Please choose the variable to test.")
variable <- readline(prompt = "Enter one of \"max\", \"min\", or \"mean\". ")
}
# We create an empty data frame to put our results
df <- data.frame(stringsAsFactors = FALSE)
colname <- colnames(month[grep(variable, names(month), ignore.case = TRUE)])
# We loop through the values of `k`
for (k in NAs) {
if (verbose == TRUE) print(paste("Running", numberTests, "tests with", k, "NAs."))
# We now run the original dataEliminator script, `numberTests` times, using the `simplify argument`.
result <- replicate(numberTests, dataEliminator(month = month, NAs = k, sampling = sampling, variables = variable, simplify = TRUE, interpolate = interpolate))
# Then we make a table of the results, proportion of tests passed overall, and broken down by test type.
if (interpolate) {
sta <- c(mean(result[1,]), mean(result[2,]))
} else {
sta <- summary(result)
}
row <- c(colname, k, numberTests, sta)
df <- rbind(df, row, stringsAsFactors = FALSE)
}
if (interpolate) {
names(df) <- c("Variable", "No.NA", "No.Reps", "Mean.Error", "Mean.Error.Approx")
} else {
names(df) <- c("Variable", "No.NA", "No.Reps", "Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max")
}
return(df)
}
#' Perform the mass elimination of consecutive daily values from a monthly data set
#'
#' @param month data.frame; a data.frame with a single year-month of data with no missing values
#' @param csv character; path to a .csv file containing a single year-month of data with no missing values
#' @param NAs numeric; a vector of the number of NA values to test
#' @param variables character; the names of the variables to test (we will try to auto-idenify the column number)
#' @param simplify Boolean; whether to return simplified results
#' @param interpolate Boolean; whether to use linear interpolation to approximate the missing values
#'
#' @importFrom stats sd
#' @importFrom utils read.csv
#' @importFrom zoo na.approx
#'
#' @export
#'
dataEliminatorThorough <- function(month, csv, NAs, variables = c("max", "min", "mean"), simplify = FALSE, interpolate = FALSE) {
## This version of the function is for consecutive sampling, and will test all possible
## permutations of a month with `1` missing values
## First, I hold the users hand, making sure that we have all of the information we need
## to perform the test.
# Set up the data.frame `month`
if (missing(month) && missing(csv)) csv <- readline(prompt = "Please specify csv file. ")
if (missing(month) && !missing(csv)) month <- read.csv(csv)
# Get the range of `k` and sampling type
if (missing(NAs)) NAs <- as.numeric(readline(prompt = "Enter vector of values of `k` to test: "))
# Find out which variables we are interested in
for (var in seq_along(variables)) {
variables[var] <- as.numeric(grep(variables[var], names(month), ignore.case = TRUE))
}
variables <- as.numeric(variables)
# We make an empty data frame where we will store our results
df2 <- df3 <- data.frame(stringsAsFactors = FALSE)
# Now we start an outer loop, for each value of `k`
for (k in NAs) {
df <- data.frame(stringsAsFactors = FALSE)
# Now we start an inner loop, which will loop through dates from the 1st to the (n+1-k)th
# Note, please ignore the fact that my loop function variables are different letters than above!
for (days in 1:(nrow(month)+1-k)) {
# Choose which days to eliminate
NAs <- seq(days, days-1+k)
# We do the nullifying
monthMod <- month
monthMod[NAs,] <- NA
# Now we start an inner loop that does all the calculations and builds the results table
for (var in variables) {
colname <- colnames(month[var])
month[[var]] <- as.numeric(month[[var]])
mean <- mean(month[[var]], na.rm = TRUE)
SD <- sd(month[[var]], na.rm = T)
meanMod <- mean(monthMod[[var]], na.rm = TRUE)
deltaMean <- abs(meanMod-mean)
prop <- deltaMean/SD
if (interpolate) {
monthApprox <- na.approx(monthMod[[var]])
prop <- c(prop, abs(mean(monthApprox, na.rm = TRUE)-mean)/SD)
}
# Then we stick it all together in a table.
row <- c(colname, k, NAs[1], NAs[length(NAs)], mean, SD, meanMod,deltaMean, prop)
df <- rbind(df, row, stringsAsFactors = FALSE)
}
}
if (interpolate) {
names(df) <- c("Variable", "No.NA", "StartDate", "EndDate", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error", "Mean.Error.Approx")
} else {
names(df) <- c("Variable", "No.NA", "StartDate", "EndDate", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error")
}
df3 <- rbind(df3, df)
if (interpolate) {
names(df3) <- c("Variable", "No.NA", "StartDate", "EndDate", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error", "Mean.Error.Approx")
} else {
names(df3) <- c("Variable", "No.NA", "StartDate", "EndDate", "Mean", "StDev", "Mod.Mean", "Abs.Diff", "Mean.Error")
}
# Yet another loop (Optimization be damned!) that takes the summary stats for each variable, in
# case we want a simple summary.
for (var in variables) {
colname <- colnames(month[var])
index <- which(df$Variable == colname)
if (interpolate) {
sta <- c(mean(as.numeric(df$Mean.Error[index])), mean(as.numeric(df$Mean.Error.Approx[index])))
} else {
sta <- summary(as.numeric(df$Mean.Error[index]))
}
row <- c(colname, k, length(1:(nrow(month)+1-k)), sta)
df2 <- rbind(df2, row, stringsAsFactors = FALSE)
}
if (interpolate) {
names(df2) <- c("Variable", "No.NA", "No.Reps", "Mean.Error", "Mean.Error.Approx")
} else {
names(df2) <- c("Variable", "No.NA", "No.Reps", "Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max")
}
}
# If we chose the simplify option, we just get the summary results.
if (simplify) {
return(df2)
} else {
return(df3)
}
}
#' Calculate the average of all ensembles for the same Variable/Model/Scenario
#'
#' @param datin a \code{tbl_df} produced by gcm_anomalies().
#'
#' @importFrom dplyr group_by left_join select summarize summarize_all
#' @importFrom magrittr %>%
#' @export
ensemble_means <- function(datin) {
ensembles <- datin %>% group_by(Variable, Model, Scenario) %>% summarize(Ensembles = list(unique(Ensemble)))
summaries <- datin %>% select(-4) %>% group_by(Variable, Model, Scenario) %>% summarize_all(mean)
datout <- left_join(ensembles, summaries)
datout
}
#' Generate table GCM anomaly data
#'
#' @param dir character; path to the directory containing the \code{.nc} files you wish to process
#' @param files character; list of filenames to process. Any list item of >1 will be processed sequentially.
#' @param lat numeric; the latitude of the target cell
#' @param lon numeric; the longitude of the target cell
#' @param timeslice numeric; 1-16: annual (0), monthly (1-12), seasonal (13-16)
#' @param baseline the baseline on which the check the data (defaults to 1971:2000)
#' @param start projection start year (defaults to 2011)
#' @param simple_names Boolean; whether to simplify the projection interval names (ie. 2020s instead of 2011-2040)
#' @param calc_anom Boolean; whether to calculate anomaly values.
#' @param ensemble_average Boolean; whether to average different ensembles for the same variable/model/scenario.
#' @param cache_dir character; a directory to cache results (defaults to NULL, i.e. no cache.)
#'
#' @return a class \code{data.frame}
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr arrange filter group_by summarize
#' @importFrom ncdf4 nc_close nc_open ncvar_get
#' @importFrom ncdf4.helpers get.split.filename.cmip5 nc.get.time.series nc.get.var.subset.by.axes
#' @importFrom storr storr_rds
#' @importFrom tibble as_tibble tibble
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @importFrom zoo as.yearmon
#'
#' @examples
#' \dontrun{annual <- gcm_anomalies(getwd(), -7.023833, -76.465222, 0, 1971:2000, calc_anom = TRUE)}
gcm_anomalies <- function(dir = getwd(), files, lat, lon, timeslice = 0, baseline, start = 2011, simple_names = FALSE, calc_anom = TRUE, ensemble_average = TRUE, cache_dir = NULL) {
if(!is.null(cache_dir)) {
use_storr = TRUE
sto <- storr_rds(cache_dir)
}
## Generate projection periods based on baseline length and start year
number_of_periods <- (2100 - start + 1) %/% length(baseline)
proj <- list()
period_names <- rep(NA, number_of_periods)
for (per in 1:number_of_periods) {
st <- start + (length(baseline) * (per - 1))
ed <- start + (length(baseline) * per) - 1
period <- list(st:ed)
period_names[per] <- if(simple_names) paste0(mean(unlist(period)) %/% 10 * 10, "s") else paste0(st, "-", ed)
proj <- c(proj, period)
}
## Prepare a table with the right dimensions
dat <- as_tibble(matrix(NA, nrow = length(files), ncol = 5 + number_of_periods))
# Process each NetCDF file
print("Processing the NetCDF files...")
## Set up a progress Bar
prog <- txtProgressBar(min = 0, max = length(files), style = 3)
on.exit(close(prog))
for (file_list in seq_along(files)) {
fs <- unlist(files[file_list])
for (f in 1:length(fs)) {
components <- get.split.filename.cmip5(file.path(dir, fs[f]))
var <- unname(components['var'])
model <- unname(components['model'])
scenario <- unname(components['emissions'])
ensemble <- unname(components['run'])
nc_nc <- nc_open(file.path(dir, fs[f]))
if (f == 1) {
# Get the bounds of each cell
lon_bnds <- try(ncvar_get(nc_nc, "lon_bnds"), silent = TRUE)
if (inherits(lon_bnds, "try-error")) {
lon_bnds <- ncvar_get(nc_nc, "lon_bounds")
}
lat_bnds <- try(ncvar_get(nc_nc, "lat_bnds"), silent = TRUE)
if (inherits(lat_bnds, "try-error")) {
lat_bnds <- ncvar_get(nc_nc, "lat_bounds")
}
# Convert Western longitudes to degrees East
lon <- ifelse(lon <0, 360 + lon, lon)
# Get the grid cell we are interested in
lat_index <- which(lat_bnds[1,] <= lat & lat_bnds[2,] >= lat)
lon_index <- which(lon_bnds[1,] <= lon & lon_bnds[2,] >= lon)
rm(lat_bnds, lon_bnds); gc()
}
# Check the cache
key <- paste(fs[f], paste(lat_index, collapse = "-"), paste(lon_index, collapse = "-"), sep = "_")
time_tmp <- NULL
if (use_storr) {
if (sto$exists(key)) {
print("Hit the cache")
time_tmp <- sto$get(key)
}
}
if (is.null(time_tmp)) {
print("Missed the cache")
# Get the time
nc_time <- nc.get.time.series(nc_nc, v = var, time.dim.name = "time")
nc_time <- as.yearmon(format(nc_time, format = "%Y-%m-%d hh:mm:ss"))
# Now load only that grid data
nc_var <- nc.get.var.subset.by.axes(nc_nc, var, axis.indices = list(X = lon_index, Y = lat_index))
# Close the nc connection
nc_close(nc_nc); rm(nc_nc)
if (dim(nc_var)[1] > 1 || dim(nc_var)[2] > 1) {
warning("We got two latitud or longitude cells. We'll average across them.")
nc_var <- apply(nc_var, 3, mean)
} else {
nc_var <- nc_var[1, 1, ]
}
if (var %in% c("tas", "tasmax", "tasmin")) {
nc_var <- nc_var - 273.15
} else {
stop("Sorry, for now I only work with temperature!")
}
time_tmp <- tibble(Time = nc_time, Year = format(as.yearmon(Time), format = "%Y"),
Month = format(as.yearmon(Time), format = "%m"),
Var = nc_var)
rm(nc_var, nc_time); gc()
if(use_storr) sto$set(key, time_tmp)
}
if (f == 1) {
time_series <- time_tmp
} else {
time_series <- suppressWarnings(bind_rows(time_series, time_tmp))
}
}
if (timeslice > 0 & timeslice < 13) {
time_series <- filter(time_series, Month == sprintf("%02d", timeslice))
} else if (timeslice > 12 & timeslice < 17) {
if (timeslice == 13) {
time_series <- filter(time_series, Month %in% sprintf("%02d", c(1, 2, 12)))
time_series$Year[time_series$Month == sprintf("%02d", 12)] <- as.numeric(time_series$Year[time_series$Month == sprintf("%02d", 12)]) + 1
}
if (timeslice == 14) {
time_series <- filter(time_series, Month %in% sprintf("%02d", c(3, 4, 5)))
}
if (timeslice == 15) {
time_series <- filter(time_series, Month %in% sprintf("%02d", c(6, 7, 8)))
}
if (timeslice == 16) {
time_series <- filter(time_series, Month %in% sprintf("%02d", c(9, 10, 11)))
}
}
result <- time_series %>% group_by(Year) %>% summarize(Mean = mean(Var))
result$Year <- as.numeric(result$Year)
periods <- rep(NA, length(proj))
if (scenario == "historical") {
baseln <- mean(result$Mean[result$Year >= min(baseline) & result$Year <= max(baseline)])
} else {
baseln <- NA
for (i in seq_along(proj)) {
periods[i] <- mean(result$Mean[result$Year >= min(proj[[i]]) & result$Year <= max(proj[[i]])])
}
}
col_names <- c("Variable", "Model", "Scenario", "Ensemble", paste0("Baseline ", min(baseline), "-", max(baseline)), period_names)
row <- list(var, model, scenario, ensemble, baseln)
row <- c(row, periods)
names(row) <- col_names
dat[file_list,] <- row
colnames(dat) <- col_names
setTxtProgressBar(prog, value = file_list)
}
# Compute additional metrics
if (calc_anom) dat <- calc_anoms(dat)
if (ensemble_average) dat <- ensemble_means(dat)
# Return our tibble
dat %>% arrange(Model, Scenario, Ensemble)
}
#' Generate a list of GCM files to pass to \code{gcm_anomalies()}
#'
#' @param dir character; the directory containing the files.
#' @param baseline numeric; a vector years that should be covered by history (\code{NULL} to ignore)
#' @param projection numeric; a vector years that should be covered by the models (\code{NULL} to ignore)
#' @param scenario character; the scenario to filter on (optional)
#' @param ensemble character; the ensemble to fulter on (optional)
#'
#' @return a list of filenames
#'
#' @importFrom tibble add_column as_tibble
#' @importFrom dplyr count filter
#' @importFrom magrittr %>%
#' @importFrom ncdf4.helpers get.split.filename.cmip5
#' @importFrom zoo as.yearmon
#'
#' @export
#'
#' @examples
#' \dontrun{get_gcm_files(getwd(), 1971:2000, scenario = c("rcp85", "rcp45", "rcp26", "rcp60"), ensemble = "r1i1p1")}
get_gcm_files <- function(dir, baseline = NULL, projection = NULL, scenario = NULL, ensemble = NULL) {
# If we want both the baseline and the projections, we'll run this twice, once for each period.
if (!is.null(baseline) && !is.null(projection)) {
baseline_files <- get_gcm_files(dir, baseline, NULL, scenario, ensemble)
projection_files <- get_gcm_files(dir, NULL, projection, scenario, ensemble)
return(append(baseline_files, projection_files))
}
if (is.null(baseline)) period <- projection
if (is.null(projection)) {
period <- baseline
# There are no experiments in the past.
scenario <- "historical"
}
# Get the available files
filenames <- dir(dir)
choices <- as_tibble(t(sapply(filenames, get.split.filename.cmip5)))
names(choices) <- c("Variable", "Period", "Model", "Scenario", "Ensemble", "Range", "Start", "End")
choices <- choices %>% add_column(Filenames = filenames)
# Narrow down our choices
filtered <- choices
if (!is.null(scenario)) filtered <- filtered %>% filter(Scenario %in% scenario)
if (!is.null(ensemble)) filtered <- filtered %>% filter(Ensemble %in% ensemble)
# A lot of useful model output is split into multiple files
multi_models <- filtered %>% group_by(Scenario, Ensemble) %>% count(Model) %>% filter(n > 1)
groupings <- rep(list(NA), nrow(multi_models))
for (row in 1:nrow(multi_models)){
tab <- filtered %>% filter(Model == multi_models$Model[row], Scenario == multi_models$Scenario[row], Ensemble == multi_models$Ensemble[row])
# If any file for this model gives us all the data we need, we'll skip this row
if (nrow(tab[tab$Start <= paste0(period[1], "01") & tab$End >= paste0(period[length(period)], "12"),]) > 0) {
next
}
# If the data starts too late or ends too soon, we'll skip this row
if (max(tab$End) < paste0(period[length(period)], "12")) {
next
}
if (min(tab$Start) > paste0(period[1], "01")) {
next
}
# If each file starts one year-month after the previous, we're happy!
for (file in seq((max(which(tab$Start <= paste0(period[1], "01"))) + 1), min(which(tab$End >= paste0(period[length(period)], "12"))))) {
if (as.yearmon(tab$Start[file], format = "%Y%m") == (as.yearmon(tab$End[file - 1], format = "%Y%m") + 1/12)) {
if (is.na(groupings[row])) {
groupings[row] <- list(c(tab$Filenames[file - 1], tab$Filenames[file]))
} else {
groupings[row] <- list(c(unlist(groupings[row]), tab$Filenames[file]))
}
} else {
groupings[row] <- NA
}
}
}
# Spit out the groupings, generated above, as well as any file that has all the data we need.
append(groupings[!is.na(groupings)], filtered$Filenames[filtered$Start <= paste0(period[1], "01") & filtered$End >= paste0(period[length(period)], "12")])
}
#' Eliminate daily values within months of a data set and report the results
#'
#' @param monthin data.frame; a data.frame with a single year-month of data with no missing values
#' @param NAs numeric; a vector of the number of NA values to test
#' @param no_of_tests integer; the number of tests to run; for consecutive sampling, tops at N - k + 1
#' @param sampling character; the type of sampling to use: (r)andom, (c)onsecutive
#' @param variables character; the names of the variables to test (we will try to auto-idenify the column number)
#' @param cores numeric; the number of cores to parallize over. Defaults to \code{parallel::detectCores() - 1}
#'
#' @importFrom dplyr bind_rows
#' @importFrom iterators icount
#' @importFrom foreach %do% %dopar% foreach
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom stats sd
#' @importFrom utils read.csv
#' @importFrom zoo na.approx
#'
#' @export
#'
missing_data_lab <- function(monthin, NAs, sampling = "z", no_of_tests = 1, variables = c("max", "min", "mean"), cores) {
# Get the range of `k` and sampling type
if (missing(NAs)) NAs <- as.numeric(readline(prompt = "Enter vector of values of `k` to test: "))
while (sampling != "c" & sampling != "r") {
sampling <- readline(prompt = "Do you want NAs to be generated (r)andomly, or (c)onsecutively? ")
if (sampling != "c" & sampling != "r") print("You must choose either (c) or (r).")
}
for (var in seq_along(variables)) {
variables[var] <- grep(variables[var], names(monthin), ignore.case = TRUE)
}
variables <- as.numeric(variables)
# First we generate our random data points that will be nullified
NAvectors <- foreach(k=NAs, .combine = "append") %do% {
if (sampling == "r") {
NAvec <- replicate(no_of_tests, list(sample(1:nrow(monthin), size = k, replace = FALSE)))
}
if (sampling == "c") {
if (nrow(monthin) - k + 1 < no_of_tests) {
NAvec <- lapply(1:(nrow(monthin) - k + 1), function (x) eval(x:(x-1+k)))
} else {
samp_f <- function() {
samp <- sample(1:(nrow(monthin)+1-k), size = 1, replace = FALSE)
list(seq(samp, samp-1+k))
}
NAvec <- replicate(no_of_tests, samp_f())
}
}
NAvec
}
if (missing(cores)) {
cores <- detectCores() - 1
}
cl <- makeCluster(cores, outfile = "")
registerDoParallel(cl)
pb <- txtProgressBar(0, length(NAvectors), style = 3)
results <- foreach(i = icount(length(NAvectors)), .packages = c("dplyr", "foreach", "tibble", "reshape2", "zoo")) %dopar% {
NAs <- unlist(NAvectors[i])
month <- foreach(var=variables, .combine = rbind) %do% {
month <- monthin[,c(1, var)]
mod_var <- month[[2]]
mod_var[NAs] <- NA
month <- tibble(Date = month[[1]], var = colnames(month)[2], Intact = month[[2]], wMiss = mod_var, LinInt = na.approx(mod_var, na.rm = FALSE), SplInt = na.spline(mod_var))
month
}
summary <- month %>%
melt(id.vars = c("Date", "var"), variable.name = "treatment") %>%
group_by(yearmon = as.yearmon(Date, format = "%Y-%m-%d"), var, treatment) %>%
select(-`Date`) %>%
summarize_all(c(mean = mean.default, sd = sd), na.rm = TRUE) %>%
mutate(k = length(NAs), days = list(NAs))
summary <- summary %>%
left_join(., subset(summary, treatment == "Intact", select = c(-`treatment`, -`k`, -`days`)), by = c("yearmon", "var")) %>%
mutate(err = abs(mean.x - mean.y), prop = err / sd.y) %>%
select(yearmon, var, k, days, treatment, mean = mean.x, sd = sd.x, -`mean.y`, -`sd.y`, err, prop)
summary <- summary %>%
left_join(., subset(summary, treatment == "wMiss", select = c(-`treatment`, -`k`, -`days`)), by = c("yearmon", "var")) %>%
mutate(change_real = err.y - err.x, change_prop = prop.y - prop.x) %>%
select(yearmon, var, k, days, treatment, mean = mean.x, sd = sd.x, -`mean.y`, -`sd.y`, err = err.x, prop = prop.x, change_real, change_prop)
setTxtProgressBar(pb, i)
list(summary = summary, details = month)
}
stopCluster(cl)
suppressWarnings(list(summary = bind_rows(lapply(results, "[[", 1)), details = bind_rows(lapply(results, "[[", 2))))
}
......@@ -10,7 +10,7 @@ knitr::opts_chunk$set(echo = TRUE)
## Functions from the University of Toronto Climate Lab
[![build status](https://gitlab.com/ConorIA/claut/badges/master/build.svg)](https://gitlab.com/ConorIA/claut/commits/master) [![Build status](https://ci.appveyor.com/api/projects/status/kioqnwa6xqs1f4j5?svg=true)](https://ci.appveyor.com/project/ConorIA/claut)
This package, currently at a very early stage of development, will eventually host many of the functions generated at the University of Toronto Climate Lab. For now, I (Conor Anderson) am the sole maintainer of, and contributor to, this package, however, I hope that eventually all of the miscellaneous functions that are produced in the lab can find their way into this package. Any function contained in this package is documented. Type `?function_name` in R to access this documentation.
This package, currently at a very early stage of development, will eventually host many of the functions generated at the University of Toronto Climate Lab. For now, I (Conor Anderson) am the sole maintainer of, and contributor to, this package, however, I hope that eventually all of the miscellaneous functions that are produced in the lab can find their way into this package. Any function contained in this package is documented. Type `?function_name` in R to access this documentation. I consider this to be a "working package"; the functions contained herein are likely to change and/or may be split away into other projects without warning.
## Installation
......@@ -24,10 +24,8 @@ source("https://gitlab.com/ConorIA/claut/raw/master/install_claut.R")
The following functions were used in studies that are currently under review. If you are a reviewer, this is probably what you are looking for. Note that these functions may undergo some minor optimization or code changes, but the results that they produce will always be the same.
- dataEliminator: This set of functions is used to artificially introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be re-factored.
1. [`dataEliminator()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminator.R): The base function that eliminates data for a single year-month of data
2. [`dataEliminatorMassive()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorMassive.R): A helper function that calls the base function multiple times, e.g. 1000 repetitions of the same test
3. [`dataEliminatorThorough()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorThorough.R): A modified version of the base function that performs consecutive elimination of all possible combinations of $k$ consecutive missing values
- [`deltaDTD()`](https://gitlab.com/ConorIA/claut/blob/master/R/deltaDTD.R): Calculate a number of measures of diurnal temperature variability.
- [`missing_value_lab()`](https://gitlab.com/ConorIA/claut/blob/master/R/missing_value_lab.R): A function to artificially introduce missing values into monthly climate series, replace those missing values with linear interpolation and cubic splines, and then calculate the error in the calculated mean.
## Misc functions
......@@ -37,10 +35,10 @@ There are some other helper functions in this package that are here in hopes tha
- functions for working with ASCII gridded data; these were originally written to parse NOAA's [GHCN Merged gridded data set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R): Reads an ASCII file of gridded data into a 3D matrix. This is currently quite slow and a little noisy, but it works.
- [`generate_wkt_csv()`](https://gitlab.com/ConorIA/claut/blob/master/R/generate_wkt_csv.R): This function takes a list of matrix names and generates a `.csv` file of WKT-formatted polygons for import into QGIS and other GIS software.
- [`gcm_anomalies()`](https://gitlab.com/ConorIA/claut/blob/master/R/gcm_anomalies.R): This function processes NetCDF files donwloaded from [ESGF-CoG](https://www.earthsystemcog.org/projects/cog/) to perform delta analyses. It has been tested with a few AR5 models for temperature variables (`tas`, `tasmax`, and `tasmin`).
## Other resources
Some of the functions that _were_ contained in the package have been moved to my [Conjuntool project](https://gitlab.com/ConorIA/conjuntool).
For other resources from the climate lab and beyond, please see our [CL@UT Resources List](https://gitlab.com/ConorIA/claut-resources).
## Contributing to the `claut` package
......
claut
================
Functions from the University of Toronto Climate Lab
----------------------------------------------------
[![build status](https://gitlab.com/ConorIA/claut/badges/master/build.svg)](https://gitlab.com/ConorIA/claut/commits/master) [![Build status](https://ci.appveyor.com/api/projects/status/kioqnwa6xqs1f4j5?svg=true)](https://ci.appveyor.com/project/ConorIA/claut)
This package, currently at a very early stage of development, will eventually host many of the functions generated at the University of Toronto Climate Lab. For now, I (Conor Anderson) am the sole maintainer of, and contributor to, this package, however, I hope that eventually all of the miscellaneous functions that are produced in the lab can find their way into this package. Any function contained in this package is documented. Type `?function_name` in R to access this documentation.
Installation
------------
This package can be installed via the `install_git()` function in the `devtools` or `remotes` packages, or by running the following convenience script.
## Functions from the University of Toronto Climate Lab
[![build
status](https://gitlab.com/ConorIA/claut/badges/master/build.svg)](https://gitlab.com/ConorIA/claut/commits/master)
[![Build
status](https://ci.appveyor.com/api/projects/status/kioqnwa6xqs1f4j5?svg=true)](https://ci.appveyor.com/project/ConorIA/claut)
This package, currently at a very early stage of development, will
eventually host many of the functions generated at the University of
Toronto Climate Lab. For now, I (Conor Anderson) am the sole maintainer
of, and contributor to, this package, however, I hope that eventually
all of the miscellaneous functions that are produced in the lab can find
their way into this package. Any function contained in this package is
documented. Type `?function_name` in R to access this documentation. I
consider this to be a “working package”; the functions contained herein
are likely to change and/or may be split away into other projects
without warning.
## Installation
This package can be installed via the `install_git()` function in the
`devtools` or `remotes` packages, or by running the following
convenience script.
``` r
source("https://gitlab.com/ConorIA/claut/raw/master/install_claut.R")
```
Functions used in papers in review
----------------------------------
The following functions were used in studies that are currently under review. If you are a reviewer, this is probably what you are looking for. Note that these functions may undergo some minor optimization or code changes, but the results that they produce will always be the same.
- dataEliminator: This set of functions is used to artificially introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be re-factored.
1. [`dataEliminator()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminator.R): The base function that eliminates data for a single year-month of data
2. [`dataEliminatorMassive()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorMassive.R): A helper function that calls the base function multiple times, e.g. 1000 repetitions of the same test
3. [`dataEliminatorThorough()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorThorough.R): A modified version of the base function that performs consecutive elimination of all possible combinations of *k* consecutive missing values
Misc functions
--------------
There are some other helper functions in this package that are here in hopes that they prove useful to someone someday. These are:
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R): An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally written to parse NOAA's [GHCN Merged gridded data set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R): Reads an ASCII file of gridded data into a 3D matrix. This is currently quite slow and a little noisy, but it works.
- [`generate_wkt_csv()`](https://gitlab.com/ConorIA/claut/blob/master/R/generate_wkt_csv.R): This function takes a list of matrix names and generates a `.csv` file of WKT-formatted polygons for import into QGIS and other GIS software.
- [`gcm_anomalies()`](https://gitlab.com/ConorIA/claut/blob/master/R/gcm_anomalies.R): This function processes NetCDF files donwloaded from [ESGF-CoG](https://www.earthsystemcog.org/projects/cog/) to perform delta analyses. It has been tested with a few AR5 models for temperature variables (`tas`, `tasmax`, and `tasmin`).
Other resources
---------------
For other resources from the climate lab and beyond, please see our [CL@UT Resources List](https://gitlab.com/ConorIA/claut-resources).
Contributing to the `claut` package
-----------------------------------
The members of the Climate Lab are, first and foremost, academics and researchers, not coders. If you would like to contribute code improvements or patches to any of the functions here, please feel free to open a [merge request](https://gitlab.com/ConorIA/claut/merge_requests). Please note that code you contribute will be attributed to you, and will be released under the terms of the [GNU GPLv3](https://gitlab.com/ConorIA/claut/blob/master/LICENSE.md).
## Functions used in papers in review
The following functions were used in studies that are currently under
review. If you are a reviewer, this is probably what you are looking
for. Note that these functions may undergo some minor optimization or
code changes, but the results that they produce will always be the
same.
- [`deltaDTD()`](https://gitlab.com/ConorIA/claut/blob/master/R/deltaDTD.R):
Calculate a number of measures of diurnal temperature
variability.
- [`missing_value_lab()`](https://gitlab.com/ConorIA/claut/blob/master/R/missing_value_lab.R):
A function to artificially introduce missing values into monthly
climate series, replace those missing values with linear
interpolation and cubic splines, and then calculate the error in the
calculated mean.
## Misc functions
There are some other helper functions in this package that are here in
hopes that they prove useful to someone someday. These
are:
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R):
An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally
written to parse NOAA’s [GHCN Merged gridded data
set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R):
Reads an ASCII file of gridded data into a 3D matrix. This is
currently quite slow and a little noisy, but it
works.
- [`generate_wkt_csv()`](https://gitlab.com/ConorIA/claut/blob/master/R/generate_wkt_csv.R):
This function takes a list of matrix names and generates a
`.csv` file of WKT-formatted polygons for import into QGIS and
other GIS software.
## Other resources
Some of the functions that *were* contained in the package have been
moved to my [Conjuntool project](https://gitlab.com/ConorIA/conjuntool).
For other resources from the climate lab and beyond, please see our
[CL@UT Resources List](https://gitlab.com/ConorIA/claut-resources).
## Contributing to the `claut` package
The members of the Climate Lab are, first and foremost, academics and
researchers, not coders. If you would like to contribute code
improvements or patches to any of the functions here, please feel free
to open a [merge
request](https://gitlab.com/ConorIA/claut/merge_requests). Please note
that code you contribute will be attributed to you, and will be released
under the terms of the [GNU
GPLv3](https://gitlab.com/ConorIA/claut/blob/master/LICENSE.md).
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calc_anoms.R
\name{calc_anoms}
\alias{calc_anoms}
\title{Calculate the anomaly values for a table produced by gcm_anomalies()}
\usage{
calc_anoms(datin)
}
\arguments{
\item{datin}{a \code{tbl_df} produced by gcm_anomalies().}
}
\description{
Calculate the anomaly values for a table produced by gcm_anomalies()
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataEliminatorMassive.R
\name{dataEliminatorMassive}
\alias{dataEliminatorMassive}
\title{Perform the mass elimination of daily values from a monthly data set}
\usage{
dataEliminatorMassive(numberTests, month, csv, NAs, sampling = "z",
variable = c("max", "min", "mean"), verbose = FALSE,
interpolate = FALSE)
}
\arguments{
\item{numberTests}{integer; the number of times to repeat the test}
\item{month}{data.frame; a data.frame with a single year-month of data with no missing values}
\item{csv}{character; path to a .csv file containing a single year-month of data with no missing values}
\item{NAs}{numeric; a vector of the number of NA values to test}
\item{sampling}{character; the type of sampling to use: (r)andom, (c)onsecutive}
\item{variable}{character; the name of the variable to test (we will try to auto-idenify the column number)}
\item{verbose}{Boolean; whether the function should be verbose}
\item{interpolate}{Boolean; whether to use linear interpolation to approximate the missing values}
}
\description{
Perform the mass elimination of daily values from a monthly data set
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataEliminatorThorough.R
\name{dataEliminatorThorough}
\alias{dataEliminatorThorough}
\title{Perform the mass elimination of consecutive daily values from a monthly data set}
\usage{
dataEliminatorThorough(month, csv, NAs, variables = c("max", "min", "mean"),
simplify = FALSE, interpolate = FALSE)
}
\arguments{
\item{month}{data.frame; a data.frame with a single year-month of data with no missing values}
\item{csv}{character; path to a .csv file containing a single year-month of data with no missing values}
\item{NAs}{numeric; a vector of the number of NA values to test}
\item{variables}{character; the names of the variables to test (we will try to auto-idenify the column number)}
\item{simplify}{Boolean; whether to return simplified results}
\item{interpolate}{Boolean; whether to use linear interpolation to approximate the missing values}
}
\description{
Perform the mass elimination of consecutive daily values from a monthly data set
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ensemble_means.R
\name{ensemble_means}
\alias{ensemble_means}
\title{Calculate the average of all ensembles for the same Variable/Model/Scenario}
\usage{
ensemble_means(datin)
}
\arguments{
\item{datin}{a \code{tbl_df} produced by gcm_anomalies().}
}
\description{
Calculate the average of all ensembles for the same Variable/Model/Scenario
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gcm_anomalies.R
\name{gcm_anomalies}
\alias{gcm_anomalies}
\title{Generate table GCM anomaly data}
\usage{
gcm_anomalies(dir = getwd(), files, lat, lon, timeslice = 0, baseline,
start = 2011, simple_names = FALSE, calc_anom = TRUE,
ensemble_average = TRUE, cache_dir = NULL)
}
\arguments{
\item{dir}{character; path to the directory containing the \code{.nc} files you wish to process}
\item{files}{character; list of filenames to process. Any list item of >1 will be processed sequentially.}
\item{lat}{numeric; the latitude of the target cell}
\item{lon}{numeric; the longitude of the target cell}
\item{timeslice}{numeric; 1-16: annual (0), monthly (1-12), seasonal (13-16)}
\item{baseline}{the baseline on which the check the data (defaults to 1971:2000)}
\item{start}{projection start year (defaults to 2011)}
\item{simple_names}{Boolean; whether to simplify the projection interval names (ie. 2020s instead of 2011-2040)}
\item{calc_anom}{Boolean; whether to calculate anomaly values.}
\item{ensemble_average}{Boolean; whether to average different ensembles for the same variable/model/scenario.}
\item{cache_dir}{character; a directory to cache results (defaults to NULL, i.e. no cache.)}
}
\value{
a class \code{data.frame}
}
\description{
Generate table GCM anomaly data
}
\examples{
\dontrun{annual <- gcm_anomalies(getwd(), -7.023833, -76.465222, 0, 1971:2000, calc_anom = TRUE)}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_gcm_files.R
\name{get_gcm_files}
\alias{get_gcm_files}
\title{Generate a list of GCM files to pass to \code{gcm_anomalies()}}
\usage{
get_gcm_files(dir, baseline = NULL, projection = NULL, scenario = NULL,
ensemble = NULL)
}
\arguments{
\item{dir}{character; the directory containing the files.}
\item{baseline}{numeric; a vector years that should be covered by history (\code{NULL} to ignore)}
\item{projection}{numeric; a vector years that should be covered by the models (\code{NULL} to ignore)}
\item{scenario}{character; the scenario to filter on (optional)}
\item{ensemble}{character; the ensemble to fulter on (optional)}
}
\value{
a list of filenames
}
\description{
Generate a list of GCM files to pass to \code{gcm_anomalies()}
}
\examples{
\dontrun{get_gcm_files(getwd(), 1971:2000, scenario = c("rcp85", "rcp45", "rcp26", "rcp60"), ensemble = "r1i1p1")}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataEliminator.R
\name{dataEliminator}
\alias{dataEliminator}
\title{Eliminate daily values from a monthly data set}
% Please edit documentation in R/missing_value_lab.R
\name{missing_data_lab}
\alias{missing_data_lab}
\title{Eliminate daily values within months of a data set and report the results}
\usage{
dataEliminator(month, csv, NAs, sampling = "z", variables = c("max", "min",
"mean"), simplify = FALSE, interpolate = FALSE)
missing_data_lab(monthin, NAs, sampling = "z", no_of_tests = 1,
variables = c("max", "min", "mean"), cores)
}
\arguments{
\item{month}{data.frame; a data.frame with a single year-month of data with no missing values}
\item{csv}{character; path to a .csv file containing a single year-month of data with no missing values}
\item{monthin}{data.frame; a data.frame with a single year-month of data with no missing values}
\item{NAs}{numeric; a vector of the number of NA values to test}
\item{sampling}{character; the type of sampling to use: (r)andom, (c)onsecutive}
\item{variables}{character; the names of the variables to test (we will try to auto-idenify the column number)}
\item{no_of_tests}{integer; the number of tests to run; for consecutive sampling, tops at N - k + 1}
\item{simplify}{Boolean; whether to return simplified results}
\item{variables}{character; the names of the variables to test (we will try to auto-idenify the column number)}
\item{interpolate}{Boolean; whether to use linear interpolation to approximate the missing values}
\item{cores}{numeric; the number of cores to parallize over. Defaults to \code{parallel::detectCores() - 1}}
}
\description{
Eliminate daily values from a monthly data set
Eliminate daily values within months of a data set and report the results
}