Commit e4abd2cd by Conor Anderson

parent 5257ba2b
Pipeline #9388041 failed with stage
in 20 minutes and 48 seconds
 ... ... @@ -4,6 +4,7 @@ export(calc_anoms) export(dataEliminator) export(dataEliminatorMassive) export(dataEliminatorThorough) export(deltaDTD) export(ensemble_means) export(gcm_anomalies) export(generate_wkt_csv) ... ... @@ -15,7 +16,9 @@ importFrom(RNetCDF,utcal.nc) importFrom(RNetCDF,var.get.nc) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,group_indices) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,summarize) importFrom(dplyr,summarize_all) ... ... @@ -23,6 +26,7 @@ importFrom(gdata,unmatrix) importFrom(magrittr,"%>%") importFrom(readr,read_table) importFrom(readr,write_csv) importFrom(stats,aggregate) importFrom(stats,sd) importFrom(tibble,as_tibble) importFrom(tibble,tibble) ... ... @@ -32,4 +36,5 @@ importFrom(utils,read.csv) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) importFrom(zoo,as.yearmon) importFrom(zoo,as.yearqtr) importFrom(zoo,na.approx)
 ##' @title Calculate deltaDTD on any time scale ##' ##' @description A function that calculates DTD, SD, G, and deltaDTD on daily, monthly, seasonal, or annual time scales. It automatically eliminates aggregate values with higher than 20\% missing values. This function requres a datain frame with dates in column 1 called 'Date', Tmax in column 2 called 'MaxTemp', Tmin in column 3 called 'MinTemp'. No other datain is necessary Note that the datain aquired though the \code{canadaHCD} package meets these needs. ##' ##' @param datain datain.frame; define the datain that should be analyzed ##' @param period character; The period for the output datain. One of "daily", "monthly", "seasonal", or "annual". ##' @param max_NA numeric; The maximum proportion of missing data to allow (e.g. 0.2 for 20\% missing values) ##' ##' @author Conor I. Anderson ##' ##' @importFrom zoo as.yearmon as.yearqtr ##' @importFrom stats aggregate sd ##' @importFrom dplyr mutate group_by group_indices select summarize ##' ##' @export ##' ##' @examples ##' \dontrun{deltaDTD(tor_dly, "annual")} deltaDTD <- function(datain, period = "z", max_NA = 0.2) { if(period != "annual" & period != "seasonal" & period != "monthly" & period != "daily") { stop("I don't recognize the period you specified.") } # Calculate the deltaDTD value. dat <- mutate(datain, DTD_Tmax = abs(datain$MaxTemp - c(NA, datain$MaxTemp[1:(nrow(datain)-1)])), DTD_Tmin = abs(datain$MinTemp - c(NA, datain$MinTemp[1:(nrow(datain)-1)])), deltaDTD = round(DTD_Tmax - DTD_Tmin, 3)) # Drop the data we don't need form the table. dat <- select(dat, Date:MinTemp, DTD_Tmax:deltaDTD) ## Stop here for daily data if(period == "daily") return(dat) # Calculate number of missing values tests <- dat %>% group_by(Yearmon = as.yearmon(Date)) %>% summarize(Missing = (sum(is.na(deltaDTD))/sum(is.na(deltaDTD), !is.na(deltaDTD)))) # Take note of those months that are missing more than 20% of the data badrows <- which(tests$Missing > max_NA) if(period == "monthly"){ # Now we stick everything together in a new data frame. dat <- dat %>% group_by(Yearmon = as.yearmon(Date)) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE)) # Wipe out the months with too much missing data. dat[badrows,2:ncol(dat)] <- NA # Calculate G value and deltaDTD dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin) ## Stop here for monthly data if (period == "monthly") return(dat) } else { # Wipe out the bad months dat[!is.na(match(dat %>% group_indices(Yearmon = as.yearmon(Date)), badrows)),2:6] <- NA dat <- mutate(dat, Year = as.integer(format(Date, format = "%Y")), Month = as.integer(format(Date, format = "%m"))) dat$Year[dat$Month == 12] <- dat$Year[dat$Month == 12] + 1 dat$Season[dat$Month == 1 | dat$Month == 2 | dat$Month == 12] <- 1 dat$Season[dat$Month == 3 | dat$Month == 4 | dat$Month == 5] <- 2 dat$Season[dat$Month == 6 | dat$Month == 7 | dat$Month == 8] <- 3 dat$Season[dat$Month == 9 | dat$Month == 10 | dat$Month == 11] <- 4 tests <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>% summarize(Total = sum(is.na(deltaDTD), !is.na(deltaDTD)), Missing = (sum(is.na(deltaDTD))/Total)) badrows <- c(which(tests$Total < 90), which(tests$Missing > max_NA)) ## Take this route for seasonal data if (period == "seasonal") { # Now we stick everything together in a new data frame. dat <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE)) # Wipe out the months with too much missing data. dat[badrows,2:ncol(dat)] <- NA # Calculate G value dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin) ## Stop here for seasonal data return(dat) } else { # Wipe out the bad quarters FIXME: Is there a way to vectorize this? dat[!is.na(match(dat %>% group_indices(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))), badrows)),2:6] <- NA # Take this route for annual data tests <- dat %>% group_by(Year = format(Date, format = "%Y")) %>% summarize(Missing = (sum(is.na(deltaDTD))/sum(is.na(deltaDTD), !is.na(deltaDTD)))) badrows <- which(tests$Missing > max_NA) # Now we stick everything together in a new data frame. dat <- dat %>% group_by(Year = format(Date, format = "%Y")) %>% summarize(Tmax = mean(MaxTemp, na.rm = TRUE), Tmin = mean(MinTemp, na.rm = TRUE), DTD_Tmax = mean(DTD_Tmax, na.rm = TRUE), DTD_Tmin = mean(DTD_Tmin, na.rm = TRUE), deltaDTD = mean(deltaDTD, na.rm = TRUE), SD_Tmax = sd(MaxTemp, na.rm = TRUE), SD_Tmin = sd(MinTemp, na.rm = TRUE)) # Wipe out the months with too much missing data. dat[badrows,2:ncol(dat)] <- NA # Calculate G value and deltaDTD dat <- mutate(dat, G_Tmax = DTD_Tmax / SD_Tmax, G_Tmin = DTD_Tmin / SD_Tmin) ## Stop here for annual data return(dat) } } }