Commit 15ffa998 authored by Conor Anderson's avatar Conor Anderson

Add `describe_snaps` and use the `.data` pronoun.

parent 50e26b9c
Pipeline #26240619 passed with stages
in 15 minutes and 24 seconds
Package: claut
Type: Package
Title: Functions from the University of Toronto Climate Lab
Version: 0.1.4
Version: 0.1.5
Date: 2018-04-17
Authors@R: c(person(given = c("Conor", "I."), family = "Anderson",
role = c("aut","cre"), email = "conor.anderson@utoronto.ca"),
......@@ -22,6 +22,8 @@ Imports:
lpSolve,
parallel,
readr,
reshape2,
rlang,
stats,
tibble,
utils,
......
# Generated by roxygen2: do not edit by hand
export(deltaDTD)
export(describe_snaps)
export(generate_wkt_csv)
export(missing_data_lab)
export(parse_ASCII_grid)
export(subensemble)
export(trimData)
importFrom(doParallel,registerDoParallel)
importFrom(dplyr,"%>%")
importFrom(dplyr,bind_rows)
importFrom(dplyr,group_by)
importFrom(dplyr,group_indices)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(dplyr,summarize_all)
importFrom(foreach,"%do%")
importFrom(foreach,"%dopar%")
importFrom(foreach,foreach)
......@@ -24,6 +28,8 @@ importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(readr,read_table)
importFrom(readr,write_csv)
importFrom(reshape2,melt)
importFrom(rlang,.data)
importFrom(stats,aggregate)
importFrom(stats,sd)
importFrom(tibble,tibble)
......
......@@ -9,8 +9,9 @@
##' @author Conor I. Anderson
##'
##' @importFrom zoo as.yearmon as.yearqtr
##' @importFrom rlang .data
##' @importFrom stats aggregate sd
##' @importFrom dplyr mutate group_by group_indices select summarize
##' @importFrom dplyr %>% mutate group_by group_indices select summarize
##'
##' @export
##'
......@@ -33,20 +34,29 @@ deltaDTD <- function(datain, period = "z", max_NA = 0.2) {
if(period == "daily") return(dat)
# Calculate number of missing values
tests <- dat %>% group_by(Yearmon = as.yearmon(Date)) %>% summarize(Missing = (sum(is.na(deltaDTD))/sum(is.na(deltaDTD), !is.na(deltaDTD))))
tests <- dat %>% group_by(Yearmon = as.yearmon(.data$Date)) %>%
summarize(Missing = (sum(is.na(.data$deltaDTD)) / sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD))))
# Take note of those months that are missing more than 20% of the data
badrows <- which(tests$Missing > max_NA)
if(period == "monthly"){
# Now we stick everything together in a new data frame.
dat <- dat %>% group_by(Yearmon = as.yearmon(Date)) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE))
dat <- dat %>% group_by(Yearmon = as.yearmon(.data$Date)) %>%
summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
Tmin = mean(.data$MinTemp, na.rm = TRUE),
DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
# Wipe out the months with too much missing data.
dat[badrows,2:ncol(dat)] <- NA
dat[badrows, 2:ncol(dat)] <- NA
# Calculate G value and deltaDTD
dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin)
dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
## Stop here for monthly data
if (period == "monthly") return(dat)
......@@ -54,29 +64,40 @@ deltaDTD <- function(datain, period = "z", max_NA = 0.2) {
} else {
# Wipe out the bad months
dat[!is.na(match(dat %>% group_indices(Yearmon = as.yearmon(Date)), badrows)),2:6] <- NA
dat[!is.na(match(dat %>% group_indices(Yearmon = as.yearmon(.data$Date)), badrows)),2:6] <- NA
dat <- mutate(dat, Year = as.integer(format(Date, format = "%Y")), Month = as.integer(format(Date, format = "%m")))
dat <- mutate(dat, Year = as.integer(format(.data$Date, format = "%Y")),
Month = as.integer(format(.data$Date, format = "%m")))
dat$Year[dat$Month == 12] <- dat$Year[dat$Month == 12] + 1
dat$Season[dat$Month == 1 | dat$Month == 2 | dat$Month == 12] <- 1
dat$Season[dat$Month == 3 | dat$Month == 4 | dat$Month == 5] <- 2
dat$Season[dat$Month == 6 | dat$Month == 7 | dat$Month == 8] <- 3
dat$Season[dat$Month == 9 | dat$Month == 10 | dat$Month == 11] <- 4
tests <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>% summarize(Total = sum(is.na(deltaDTD), !is.na(deltaDTD)), Missing = (sum(is.na(deltaDTD))/Total))
tests <- dat %>% group_by(Yearqtr = as.yearqtr(paste(.data$Year, .data$Season, sep = "-"))) %>%
summarize(Total = sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD)),
Missing = (sum(is.na(.data$deltaDTD)) / .data$Total))
badrows <- c(which(tests$Total < 90), which(tests$Missing > max_NA))
## Take this route for seasonal data
if (period == "seasonal") {
# Now we stick everything together in a new data frame.
dat <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE))
dat <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>%
summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
Tmin = mean(.data$MinTemp, na.rm = TRUE),
DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
# Wipe out the months with too much missing data.
dat[badrows,2:ncol(dat)] <- NA
# Calculate G value
dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin)
dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
## Stop here for seasonal data
return(dat)
......@@ -84,21 +105,34 @@ deltaDTD <- function(datain, period = "z", max_NA = 0.2) {
} else {
# Wipe out the bad quarters FIXME: Is there a way to vectorize this?
dat[!is.na(match(dat %>% group_indices(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))), badrows)),2:6] <- NA
dat[!is.na(match(dat %>%
group_indices(Yearqtr = as.yearqtr(paste(.data$Year,
.data$Season,
sep = "-"))),
badrows)),2:6] <- NA
# Take this route for annual data
tests <- dat %>% group_by(Year = format(Date, format = "%Y")) %>% summarize(Missing = (sum(is.na(deltaDTD))/sum(is.na(deltaDTD), !is.na(deltaDTD))))
tests <- dat %>% group_by(Year = format(.data$Date, format = "%Y")) %>%
summarize(Missing = (sum(is.na(.data$deltaDTD)) / sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD))))
badrows <- which(tests$Missing > max_NA)
# Now we stick everything together in a new data frame.
dat <- dat %>% group_by(Year = format(Date, format = "%Y")) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE))
dat <- dat %>% group_by(Year = format(.data$Date, format = "%Y")) %>%
summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
Tmin = mean(.data$MinTemp, na.rm = TRUE),
DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
# Wipe out the months with too much missing data.
dat[badrows,2:ncol(dat)] <- NA
# Calculate G value and deltaDTD
dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin)
dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
## Stop here for annual data
return(dat)
......
#' Describe run-length-encoded events
#'
#' Given an object of class \code{rle}, prints a vector of the length of
#' \code{sum(run_lengths$lengths)}, with each length repeated. It is useful to
#' annotate continuous events in a data frame.
#'
#' If passed a function along with an additional vector. The function is applied
#' to the values of the vector when each event is \code{TRUE}.
#'
#' @param run_lengths an object of class \code{rle}, of some criteria for an "event"
#' @param threshold the minimum length of the events to consider (default: 2)
#' @param vec (optional) a vector of data for which \code{f} should be applied
#' @param f (optional) a function to apply to the values of \code{vec} where events are \code{TRUE}.
#' @param ... additional parameters to be passed to \code{f}
#'
#' @return a vector
#' @export
#'
#' @examples
#' # Create some "weather"
#' weather <- rnorm(1000)
#' # Get the lengths of "cold" events where the temperature is below zero for more than 3 days
#' describe_snaps(rle(weather < 0), 3)
describe_snaps <- function(run_lengths, threshold = 2, vec, f = NULL, ...) {
run_lengths$values[run_lengths$lengths < threshold] <- FALSE
if (!inherits(f, "function")) {
unname(unlist(apply(data.frame(lengths = run_lengths$lengths,
values = run_lengths$values),
1, function(x) {
if (!is.na(x[2]) & x[2]) {
rep(x[1], times = x[1])
} else {
rep(NA, times = x[1])
}
})))
} else {
unname(unlist(apply(data.frame(
lengths = run_lengths$lengths,
values = run_lengths$values,
index = 1:length(run_lengths[[1]])
),
1, function(x) {
if (!is.na(x[2]) & x[2]) {
pull <- c(rep(FALSE, sum(run_lengths$lengths[1:(x[3] - 1)])),
rep(TRUE, x[1]))
pull <-
c(pull, rep(FALSE, length(vec) - length(pull)))
rep(f(vec[pull]), x[1])
} else {
rep(NA, times = x[1])
}
})))
}
}
......@@ -7,11 +7,13 @@
#' @param variables character; the names of the variables to test (we will try to auto-idenify the column number)
#' @param cores numeric; the number of cores to parallize over. Defaults to \code{parallel::detectCores() - 1}
#'
#' @importFrom dplyr bind_rows
#' @importFrom dplyr %>% bind_rows left_join summarize_all
#' @importFrom iterators icount
#' @importFrom foreach %do% %dopar% foreach
#' @importFrom doParallel registerDoParallel
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom reshape2 melt
#' @importFrom rlang .data
#' @importFrom stats sd
#' @importFrom utils read.csv
#' @importFrom zoo na.approx
......@@ -59,7 +61,8 @@ missing_data_lab <- function(monthin, NAs, sampling = "z", no_of_tests = 1, vari
registerDoParallel(cl)
pb <- txtProgressBar(0, length(NAvectors), style = 3)
results <- foreach(i = icount(length(NAvectors)), .packages = c("dplyr", "foreach", "tibble", "reshape2", "zoo")) %dopar% {
results <- foreach(i = icount(length(NAvectors)),
.packages = c("dplyr", "foreach", "tibble", "reshape2", "rlang", "zoo")) %dopar% {
NAs <- unlist(NAvectors[i])
......@@ -73,19 +76,19 @@ missing_data_lab <- function(monthin, NAs, sampling = "z", no_of_tests = 1, vari
summary <- month %>%
melt(id.vars = c("Date", "var"), variable.name = "treatment") %>%
group_by(yearmon = as.yearmon(Date, format = "%Y-%m-%d"), var, treatment) %>%
group_by(yearmon = as.yearmon(.data$Date, format = "%Y-%m-%d"), var, treatment) %>%
select(-`Date`) %>%
summarize_all(c(mean = mean.default, sd = sd), na.rm = TRUE) %>%
mutate(k = length(NAs), days = list(NAs))
summary <- summary %>%
left_join(., subset(summary, treatment == "Intact", select = c(-`treatment`, -`k`, -`days`)), by = c("yearmon", "var")) %>%
mutate(err = abs(mean.x - mean.y), prop = err / sd.y) %>%
mutate(err = abs(.data$mean.x - .data$mean.y), prop = .data$err / .data$sd.y) %>%
select(yearmon, var, k, days, treatment, mean = mean.x, sd = sd.x, -`mean.y`, -`sd.y`, err, prop)
summary <- summary %>%
left_join(., subset(summary, treatment == "wMiss", select = c(-`treatment`, -`k`, -`days`)), by = c("yearmon", "var")) %>%
mutate(change_real = err.y - err.x, change_prop = prop.y - prop.x) %>%
mutate(change_real = .data$err.y - .data$err.x, change_prop = .data$prop.y - .data$prop.x) %>%
select(yearmon, var, k, days, treatment, mean = mean.x, sd = sd.x, -`mean.y`, -`sd.y`, err = err.x, prop = prop.x, change_real, change_prop)
setTxtProgressBar(pb, i)
......
......@@ -32,6 +32,7 @@ The following functions were used in studies that are currently under review. If
There are some other helper functions in this package that are here in hopes that they prove useful to someone someday. These are:
- [`describe_snaps()`](https://gitlab.com/ConorIA/claut/blob/master/R/describe_snaps.R): Describes the lengths or other characteristics of run-length-encoded events, such as cold snaps or heatwaves.
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R): An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally written to parse NOAA's [GHCN Merged gridded data set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R): Reads an ASCII file of gridded data into a 3D matrix. This is currently quite slow and a little noisy, but it works.
......
......@@ -58,6 +58,10 @@ There are some other helper functions in this package that are here in
hopes that they prove useful to someone someday. These
are:
- [`describe_snaps()`](https://gitlab.com/ConorIA/claut/blob/master/R/describe_snaps.R):
Describes the lengths or other characteristics of run-length-encoded
events, such as cold snaps or
heatwaves.
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R):
An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/describe_snaps.R
\name{describe_snaps}
\alias{describe_snaps}
\title{Describe run-length-encoded events}
\usage{
describe_snaps(run_lengths, threshold = 2, vec, f = NULL, ...)
}
\arguments{
\item{run_lengths}{an object of class \code{rle}, of some criteria for an "event"}
\item{threshold}{the minimum length of the events to consider (default: 2)}
\item{vec}{(optional) a vector of data for which \code{f} should be applied}
\item{f}{(optional) a function to apply to the values of \code{vec} where events are \code{TRUE}.}
\item{...}{additional parameters to be passed to \code{f}}
}
\value{
a vector
}
\description{
Given an object of class \code{rle}, prints a vector of the length of
\code{sum(run_lengths$lengths)}, with each length repeated. It is useful to
annotate continuous events in a data frame.
}
\details{
If passed a function along with an additional vector. The function is applied
to the values of the vector when each event is \code{TRUE}.
}
\examples{
# Create some "weather"
weather <- rnorm(1000)
# Get the lengths of "cold" events where the temperature is below zero for more than 3 days
describe_snaps(rle(weather < 0), 3)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment