Add gridded data functions.

parent 10047379
Pipeline #7230282 failed with stage
Package: claut
Type: Package
Title: Functions from the University of Toronto Climate Lab
Version: 0.1.0
Date: 2016-08-16
Version: 0.1.1
Date: 2017-03-24
Authors@R: c(person(given = c("Conor", "I."), family = "Anderson",
role = c("aut","cre"), email = "conor.anderson@mail.utoronto.ca"),
person(given = c("William", "A."), family = "Gough", role = "ths",
......@@ -14,11 +14,15 @@ Description: Collection of functions developed at the University of
Depends:
R (>= 3.1.0)
Imports:
gdata,
readr,
stats,
tibble,
utils,
zoo
Suggests:
canadaHCD,
tidyverse,
senamhiR
License: GPL (>= 3)
URL: https://gitlab.com/ConorIA/claut
......
......@@ -3,7 +3,17 @@
export(dataEliminator)
export(dataEliminatorMassive)
export(dataEliminatorThorough)
export(generate_wkt_csv)
export(parse_ASCII_grid)
export(trimData)
importFrom(gdata,unmatrix)
importFrom(readr,read_table)
importFrom(readr,write_csv)
importFrom(stats,sd)
importFrom(tibble,tibble)
importFrom(utils,count.fields)
importFrom(utils,read.csv)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(zoo,as.yearmon)
importFrom(zoo,na.approx)
#' @title Generate WKT-formatted polygons for gridded parsed ASCII data
#'
#' @description This function creates a \code{.csv} file from parsed gridded ASCII data, with a properly formatted WKT polygon definition for each grid.
#'
#' @param datain object; a matrix of parsed gridded ASCII data generated using \code{\link{parse_ASCII_grid}}
#' @param name character; the name of the matrix (or matrices) to generate \code{.csv} files for, e.g. \code{"Dec 2013"}
#' @param filename character; the name to save the file under
#' @param calc_mean Boolean; if the vector of names has \code{length(name) > 1}, the we will calculate the average for the selected matrices
#' @param remove_missing Boolean; whether we should remove missing values in the mean calculation
#'
#' @export
#'
#' @importFrom gdata unmatrix
#' @importFrom tibble tibble
#' @importFrom readr write_csv
#'
#' @examples
#' # Generate an anomaly maps for Winter 2013-14
#' \dontrun{generate_wkt_csv(dat, c("Dec 2013", "Jan 2014", "Feb 2014"))}
generate_wkt_csv <- function(datain, name, filename, calc_mean = TRUE, remove_missing = TRUE) {
mat <- which(unlist(dimnames(datain)[3]) %in% name)
if (length(mat) > 1 && isTRUE(calc_mean)) {
datain <- apply(simplify2array(datain[,,mat]), c(1, 2), mean, na.rm = remove_missing)
unmat <- unmatrix(datain)
unmat[is.nan(unmat)] <- NA
} else {
unmat <- unmatrix(datain[,,mat])
}
coords <- strsplit(names(unmat), ":")
lats <- lons <- NULL
for (i in 1:length(coords)) {
lats <- c(lats, coords[[i]][1])
lons <- c(lons, coords[[i]][2])
}
tr <- paste((as.numeric(lons)+2.5), (as.numeric(lats)+2.5))
tl <- paste((as.numeric(lons)-2.5), (as.numeric(lats)+2.5))
br <- paste((as.numeric(lons)+2.5), (as.numeric(lats)-2.5))
bl <- paste((as.numeric(lons)-2.5), (as.numeric(lats)-2.5))
wkt <- paste0("POLYGON((",paste(br, tr, tl, bl, sep = ","),"))")
if (length(name) > 1) name <- paste(name, collapse = "-")
if (missing(filename)) filename <- paste0(name, ".csv")
write_csv(tibble(WKT = wkt, Anomaly = unmat), filename, na = "")
}
#' @title Parse gridded ASCII data
#'
#' @description This function was originally written to read in the NOAA GHCN Merged gridded data set "ncdc-merged-sfc-mntp.dat". It has been written such that it \emph{should} be applicable to other gridded data sets in the ASCII format.
#'
#' @param filename character; the name of the data text data file
#' @param dimensions integer; optionally specify the dimensions of each set of data, e.g. \code{c(36, 72)}
#' @param lat_range numeric; a vector of length 2 specifying the latitude of the gridded data, defaults to \code{c(-87.5, 87.5)}
#' @param lon_range numeric; a vector of length 2 specifying the longitude of the gridded data, defaults to \code{c(-177.5, 177.5)}
#' @param gridsize numeric; the size of the grids, in degrees, defaults to \code{5}.
#' @param separator character; the format of the row that is used to label each entry, defaults to \code{c("month", "year")}
#' @param start numeric; the first entry in the separator rows, defaults to \code{c(1, 1880)}
#' @param format Boolean; whether the data should be cleaned, defaults to \code{TRUE}
#' @param missing_label character or numeric; the label used to mark missing values (only used if format is \code{TRUE}), defaults to \code{-9999}
#' @param scaling numeric; the value with which values have been scaled (only used if format is \code{TRUE}), defaults to \code{100}
#' @param label Boolean; whether the resulting matrix should have the \code{dimnames} set, defaults to \code{TRUE}
#'
#' @return matrix
#'
#' @importFrom readr read_table
#' @importFrom utils count.fields setTxtProgressBar txtProgressBar
#' @importFrom zoo as.yearmon
#'
#' @export
#'
parse_ASCII_grid <- function(filename, dimensions,
lat_range = c(-87.5, 87.5),
lon_range = c(-177.5, 177.5),
gridsize = 5,
separator = c("month", "year"),
start = c(1, 1880),
format = TRUE, missing_label = -9999,
scaling = 100,
label = TRUE) {
fields <- count.fields(filename)
daterows <- which(fields == length(separator))
if (missing(dimensions)) {
dimensions = c(max(unlist(rle(fields == length(separator)))), max(fields))
}
dataout <- array(NA, dim = c(dimensions[1], dimensions[2], length(daterows)))
print("Parsing data.")
prog <- txtProgressBar(min = 0, max = length(daterows), style = 3)
on.exit(close(prog))
for(i in 1:length(daterows)) {
datain <- read_table(filename, col_names = FALSE, skip = daterows[i], n_max = 36)
if (format) {
for (col in 1:ncol(datain)) {
badrows <- which(datain[[col]] == missing_label)
datain[badrows,col] <- NA
}
if (scaling > 1L) datain <- datain/scaling
}
setTxtProgressBar(prog, value = i)
dataout[, , i] <- as.matrix(datain)
}
if (label) {
lons <- seq(lon_range[1], lon_range[2], by = gridsize)
lats <- rev(seq(lat_range[1], lat_range[2], by = gridsize))
test <- which(separator == "year")
if (length(test) > 0) {
# We have yearly data at least
year <- start[test]
} else year <- NULL
test <- which(separator == "month")
if (length(test) > 0) {
# We have monthly data it seems
month <- sprintf("%02d", start[test])
} else month <- NULL
test <- which(separator == "day")
if (length(test) > 0) {
# We have daily data it seems
day <- sprintf("%02d", start[test])
} else day <- NULL
if (!is.null(day) && !is.null(month) && !is.null(year)) {
startdate <- as.Date(paste(year, month, day, sep = "-"), format = "%Y-%m-%d")
datelist <- seq(startdate, startdate + length(daterows)-1, by = 1)
} else {
if (is.null(day) && !is.null(month) && !is.null(year)) {
startdate <- as.yearmon(paste(year, month, sep = "-"), format = "%Y-%m")
datelist <- seq(startdate, startdate + (length(daterows)-1)/12, by = 1/12)
} else {
if (is.null(day) && is.null(month) && !is.null(year)) {
datelist <- seq(year, year + length(daterows)-1, by = 1)
} else {
datelist <- seq(1, length(daterows), by = 1)
}
}
}
dimnames(dataout) <- list(as.character(lats), as.character(lons), as.character(datelist))
}
dataout
}
......@@ -15,7 +15,7 @@ This package, currently at a very early stage of development, will eventually ho
The following functions were used in studies that are currently under review. If you are a reviewer, this is probably what you are looking for. Note that these functions may undergo some minor optimization or code changes, but the results that they produce will always be the same.
- dataEliminator: This set of functions is used to artificually introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be refactored.
- dataEliminator: This set of functions is used to artificially introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be re-factored.
1. [`dataEliminator()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminator.R): The base function that eliminates data for a single year-month of data
2. [`dataEliminatorMassive()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorMassive.R): A helper function that calls the base function multiple times, e.g. 1000 repetitions of the same test
3. [`dataEliminatorThorough()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorThorough.R): A modified version of the base function that performs consecutive elimination of all possible combinations of $k$ consecutive missing values
......@@ -25,6 +25,9 @@ The following functions were used in studies that are currently under review. If
There are some other helper functions in this package that are here in hopes that they prove useful to someone someday. These are:
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R): An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally written to parse NOAA's [GHCN Merged gridded data set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R): Reads an ASCII file of gridded data into a 3D matrix. This is currently quite slow and a little noisy, but it works.
- [`generate_wkt_csv()`](https://gitlab.com/ConorIA/claut/blob/master/R/generate_wkt_csv.R): This function takes a list of matrix names and generates a `.csv` file of WKT-formatted polygons for import into QGIS and other GIS software.
## Other packages
......
......@@ -11,7 +11,7 @@ Functions used in papers in review
The following functions were used in studies that are currently under review. If you are a reviewer, this is probably what you are looking for. Note that these functions may undergo some minor optimization or code changes, but the results that they produce will always be the same.
- dataEliminator: This set of functions is used to artificually introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be refactored.
- dataEliminator: This set of functions is used to artificially introduce missing values into monthly climate series. In its current state, there are three variations of the function. These are likely to be re-factored.
1. [`dataEliminator()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminator.R): The base function that eliminates data for a single year-month of data
2. [`dataEliminatorMassive()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorMassive.R): A helper function that calls the base function multiple times, e.g. 1000 repetitions of the same test
3. [`dataEliminatorThorough()`](https://gitlab.com/ConorIA/claut/blob/master/R/dataEliminatorThorough.R): A modified version of the base function that performs consecutive elimination of all possible combinations of *k* consecutive missing values
......@@ -22,6 +22,9 @@ Misc functions
There are some other helper functions in this package that are here in hopes that they prove useful to someone someday. These are:
- [`trim_data()`](https://gitlab.com/ConorIA/claut/blob/master/R/trimData.R): An easy function to trim a `data.frame` to given start and end years
- functions for working with ASCII gridded data; these were originally written to parse NOAA's [GHCN Merged gridded data set](https://www.ncdc.noaa.gov/temp-and-precip/ghcn-gridded-products/)
- [`parse_ASCII_grid()`](https://gitlab.com/ConorIA/claut/blob/master/R/parse_ASCII_grid.R): Reads an ASCII file of gridded data into a 3D matrix. This is currently quite slow and a little noisy, but it works.
- [`generate_wkt_csv()`](https://gitlab.com/ConorIA/claut/blob/master/R/generate_wkt_csv.R): This function takes a list of matrix names and generates a `.csv` file of WKT-formatted polygons for import into QGIS and other GIS software.
Other packages
--------------
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate_wkt_csv.R
\name{generate_wkt_csv}
\alias{generate_wkt_csv}
\title{Generate WKT-formatted polygons for gridded parsed ASCII data}
\usage{
generate_wkt_csv(datain, name, filename, calc_mean = TRUE,
remove_missing = TRUE)
}
\arguments{
\item{datain}{object; a matrix of parsed gridded ASCII data generated using \code{\link{parse_ASCII_grid}}}
\item{name}{character; the name of the matrix (or matrices) to generate \code{.csv} files for, e.g. \code{"Dec 2013"}}
\item{filename}{character; the name to save the file under}
\item{calc_mean}{Boolean; if the vector of names has \code{length(name) > 1}, the we will calculate the average for the selected matrices}
\item{remove_missing}{Boolean; whether we should remove missing values in the mean calculation}
}
\description{
This function creates a \code{.csv} file from parsed gridded ASCII data, with a properly formatted WKT polygon definition for each grid.
}
\examples{
# Generate an anomaly maps for Winter 2013-14
\dontrun{generate_wkt_csv(dat, c("Dec 2013", "Jan 2014", "Feb 2014"))}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parse_ASCII_grid.R
\name{parse_ASCII_grid}
\alias{parse_ASCII_grid}
\title{Parse gridded ASCII data}
\usage{
parse_ASCII_grid(filename, dimensions, lat_range = c(-87.5, 87.5),
lon_range = c(-177.5, 177.5), gridsize = 5, separator = c("month",
"year"), start = c(1, 1880), format = TRUE, missing_label = -9999,
scaling = 100, label = TRUE)
}
\arguments{
\item{filename}{character; the name of the data text data file}
\item{dimensions}{integer; optionally specify the dimensions of each set of data, e.g. \code{c(36, 72)}}
\item{lat_range}{numeric; a vector of length 2 specifying the latitude of the gridded data, defaults to \code{c(-87.5, 87.5)}}
\item{lon_range}{numeric; a vector of length 2 specifying the longitude of the gridded data, defaults to \code{c(-177.5, 177.5)}}
\item{gridsize}{numeric; the size of the grids, in degrees, defaults to \code{5}.}
\item{separator}{character; the format of the row that is used to label each entry, defaults to \code{c("month", "year")}}
\item{start}{numeric; the first entry in the separator rows, defaults to \code{c(1, 1880)}}
\item{format}{Boolean; whether the data should be cleaned, defaults to \code{TRUE}}
\item{missing_label}{character or numeric; the label used to mark missing values (only used if format is \code{TRUE}), defaults to \code{-9999}}
\item{scaling}{numeric; the value with which values have been scaled (only used if format is \code{TRUE}), defaults to \code{100}}
\item{label}{Boolean; whether the resulting matrix should have the \code{dimnames} set, defaults to \code{TRUE}}
}
\value{
matrix
}
\description{
This function was originally written to read in the NOAA GHCN Merged gridded data set "ncdc-merged-sfc-mntp.dat". It has been written such that it \emph{should} be applicable to other gridded data sets in the ASCII format.
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment