deltaDTD.R 6.34 KB
Newer Older
Conor Anderson's avatar
Conor Anderson committed
1 2 3 4 5 6 7 8 9 10 11
##' @title Calculate deltaDTD on any time scale
##'
##' @description A function that calculates DTD, SD, G, and deltaDTD on daily, monthly, seasonal, or annual time scales. It automatically eliminates aggregate values with higher than 20\% missing values. This function requres a datain frame with dates in column 1 called 'Date', Tmax in column 2 called 'MaxTemp', Tmin in column 3 called 'MinTemp'. No other datain is necessary Note that the datain aquired though the \code{canadaHCD} package meets these needs.
##'
##' @param datain datain.frame; define the datain that should be analyzed
##' @param period character; The period for the output datain. One of "daily", "monthly", "seasonal", or "annual".
##' @param max_NA numeric; The maximum proportion of missing data to allow (e.g. 0.2 for 20\% missing values)
##'
##' @author Conor I. Anderson
##'
##' @importFrom zoo as.yearmon as.yearqtr
12
##' @importFrom rlang .data
Conor Anderson's avatar
Conor Anderson committed
13
##' @importFrom stats aggregate sd
14
##' @importFrom dplyr %>% mutate group_by group_indices select summarize
Conor Anderson's avatar
Conor Anderson committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
##'
##' @export
##'
##' @examples
##' \dontrun{deltaDTD(tor_dly, "annual")}

deltaDTD <- function(datain, period = "z", max_NA = 0.2) {

  if(period != "annual" & period != "seasonal" & period != "monthly" & period != "daily") {
    stop("I don't recognize the period you specified.")
  }

  # Calculate the deltaDTD value.
  dat <- mutate(datain, DTD_Tmax = abs(datain$MaxTemp - c(NA, datain$MaxTemp[1:(nrow(datain)-1)])), DTD_Tmin = abs(datain$MinTemp - c(NA, datain$MinTemp[1:(nrow(datain)-1)])), deltaDTD = round(DTD_Tmax - DTD_Tmin, 3))

  # Drop the data we don't need form the table.
  dat <- select(dat, Date:MinTemp, DTD_Tmax:deltaDTD)

  ## Stop here for daily data
  if(period == "daily") return(dat)

  # Calculate number of missing values
37 38
  tests <- dat %>% group_by(Yearmon = as.yearmon(.data$Date)) %>%
    summarize(Missing = (sum(is.na(.data$deltaDTD)) / sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD))))
Conor Anderson's avatar
Conor Anderson committed
39 40 41 42 43 44
  # Take note of those months that are missing more than 20% of the data
  badrows <- which(tests$Missing > max_NA)

  if(period == "monthly"){

    # Now we stick everything together in a new data frame.
45 46 47 48 49 50 51 52
    dat <- dat %>% group_by(Yearmon = as.yearmon(.data$Date)) %>%
      summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
                Tmin = mean(.data$MinTemp, na.rm = TRUE),
                DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
                DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
                deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
                SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
                SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
Conor Anderson's avatar
Conor Anderson committed
53 54

    # Wipe out the months with too much missing data.
55
    dat[badrows, 2:ncol(dat)] <- NA
Conor Anderson's avatar
Conor Anderson committed
56 57

    # Calculate G value and deltaDTD
58 59
    dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
                  G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
Conor Anderson's avatar
Conor Anderson committed
60 61 62 63 64 65 66

    ## Stop here for monthly data
    if (period == "monthly") return(dat)

  } else {

    # Wipe out the bad months
67
    dat[!is.na(match(dat %>% group_indices(Yearmon = as.yearmon(.data$Date)), badrows)),2:6] <- NA
Conor Anderson's avatar
Conor Anderson committed
68

69 70
    dat <- mutate(dat, Year = as.integer(format(.data$Date, format = "%Y")),
                  Month = as.integer(format(.data$Date, format = "%m")))
Conor Anderson's avatar
Conor Anderson committed
71 72 73 74 75 76
    dat$Year[dat$Month == 12] <- dat$Year[dat$Month == 12] + 1
    dat$Season[dat$Month == 1 | dat$Month == 2 | dat$Month == 12] <- 1
    dat$Season[dat$Month == 3 | dat$Month == 4 | dat$Month == 5] <- 2
    dat$Season[dat$Month == 6 | dat$Month == 7 | dat$Month == 8] <- 3
    dat$Season[dat$Month == 9 | dat$Month == 10 | dat$Month == 11] <- 4

77 78 79
    tests <- dat %>% group_by(Yearqtr = as.yearqtr(paste(.data$Year, .data$Season, sep = "-"))) %>%
      summarize(Total = sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD)),
                Missing = (sum(is.na(.data$deltaDTD)) / .data$Total))
Conor Anderson's avatar
Conor Anderson committed
80 81 82 83 84 85
    badrows <- c(which(tests$Total < 90), which(tests$Missing > max_NA))

    ## Take this route for seasonal data
    if (period == "seasonal") {

      # Now we stick everything together in a new data frame.
86 87 88 89 90 91 92 93
      dat <- dat %>% group_by(Yearqtr = as.yearqtr(paste(Year, Season, sep = "-"))) %>%
        summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
                  Tmin = mean(.data$MinTemp, na.rm = TRUE),
                  DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
                  DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
                  deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
                  SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
                  SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
Conor Anderson's avatar
Conor Anderson committed
94 95 96 97 98

      # Wipe out the months with too much missing data.
      dat[badrows,2:ncol(dat)] <- NA

      # Calculate G value
99 100
      dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
                    G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
Conor Anderson's avatar
Conor Anderson committed
101 102 103 104 105 106 107

      ## Stop here for seasonal data
      return(dat)

    } else {

      # Wipe out the bad quarters FIXME: Is there a way to vectorize this?
108 109 110 111 112
      dat[!is.na(match(dat %>%
                         group_indices(Yearqtr = as.yearqtr(paste(.data$Year,
                                                                  .data$Season,
                                                                  sep = "-"))),
                       badrows)),2:6] <- NA
Conor Anderson's avatar
Conor Anderson committed
113 114 115

      # Take this route for annual data

116 117
      tests <- dat %>% group_by(Year = format(.data$Date, format = "%Y")) %>%
        summarize(Missing = (sum(is.na(.data$deltaDTD)) / sum(is.na(.data$deltaDTD), !is.na(.data$deltaDTD))))
Conor Anderson's avatar
Conor Anderson committed
118 119 120
      badrows <- which(tests$Missing > max_NA)

      # Now we stick everything together in a new data frame.
121 122 123 124 125 126 127 128
      dat <- dat %>% group_by(Year = format(.data$Date, format = "%Y")) %>%
        summarize(Tmax = mean(.data$MaxTemp, na.rm = TRUE),
                  Tmin = mean(.data$MinTemp, na.rm = TRUE),
                  DTD_Tmax = mean(.data$DTD_Tmax, na.rm = TRUE),
                  DTD_Tmin = mean(.data$DTD_Tmin, na.rm = TRUE),
                  deltaDTD = mean(.data$deltaDTD, na.rm = TRUE),
                  SD_Tmax = sd(.data$MaxTemp, na.rm = TRUE),
                  SD_Tmin = sd(.data$MinTemp, na.rm = TRUE))
Conor Anderson's avatar
Conor Anderson committed
129 130 131 132 133

      # Wipe out the months with too much missing data.
      dat[badrows,2:ncol(dat)] <- NA

      # Calculate G value and deltaDTD
134 135
      dat <- mutate(dat, G_Tmax = .data$DTD_Tmax / .data$SD_Tmax,
                    G_Tmin = .data$DTD_Tmin / .data$SD_Tmin)
Conor Anderson's avatar
Conor Anderson committed
136 137 138 139 140 141

      ## Stop here for annual data
      return(dat)
    }
  }
}