Commit df4f5e45 authored by Conor Anderson's avatar Conor Anderson

Merge branch 'devel' into crop

parents d20e129b 24bd33a3
#### -- Packrat Autoloader (version 0.4.8-1) -- ####
source("packrat/init.R")
#### -- End Packrat Autoloader -- ####
if (!requireNamespace("stringi")) install.packages("stringi")
.cache/*
.Rhistory
.Rproj.user
deploy.sh
local_settings.R
packrat/lib*/
packrat/src/
plumber_settings.R
shiny/shiny.Rproj
updateip.sh
get_choices <- function(file_dir, var, lim = FALSE) {
if (debug_flag) message(sprintf("Getting choices for %s in %s", var, file_dir))
filenames <- if (lim) {
grep("200601-210012.nc$", dir(file.path(file_dir, var, "verified")), value = TRUE)
} else {
dir(file.path(file_dir, var, "verified"))
}
choices <- as_tibble(t(sapply(filenames, ncdf4.helpers::get.split.filename.cmip5)))
names(choices) <- c("Variable", "Period", "Model", "Scenario", "Ensemble", "Range", "Start", "End")
choices <- choices %>%
mutate(Model = gsub("([0-9])-([0-9])", paste0("\\1", ".", "\\2"), gsub("([0-9])-([0-9])", paste0("\\1", ".", "\\2"), Model)),
Scenario = sub("rcp([0-9])([0-9])", paste0("RCP", "\\1", ".", "\\2"), Scenario),
Filenames = filenames)
choices
}
get_gcm_ts <- function(meta) {
if (debug_flag) {
message("get_gcm_ts")
message(sprintf("Reading files from %s on %i cores with a cache date of %s",
file_dir, no_of_cores, cache_ver))
}
st_point <- storr::storr_rds(".cache/point-cache", default_namespace = "point")
# Process each NetCDF file
message("Collecting the NetCDF time series...")
registerDoParallel(no_of_cores)
withProgress(message = 'Getting time series in parallel', value = 0, {
dat <- foreach(row=1:nrow(meta), .combine=bind_rows) %dopar% {
var <- meta$Var[row]
lat_cell <- meta$Lat_Cell[row]
lon_cell <- meta$Lon_Cell[row]
fs <- unlist(meta$Files[row])
for (f in 1:length(fs)) {
key <- paste(fs[f], lat_cell, lon_cell, sep = "_")
time_tmp <- NULL
if (st_point$exists(key)) {
message("Hit the cache.")
time_tmp <- st_point$get(key)
## Invalidate old cache entries.
if (is.null(attr(time_tmp, "cache_ver")) || attr(time_tmp, "cache_ver") != cache_ver) {
message("Cached version is invalid. Deleting.")
st_point$del(key)
time_tmp <- NULL
}
}
if (is.null(time_tmp)) {
message("Missed the cache")
# Get the time
components <- get.split.filename.cmip5(fs[f])
nc_nc <- nc_open(file.path(file_dir, var, "verified", fs[f]))
nc_time <- try(nc.get.time.series(nc_nc, v = var, time.dim.name = "time"), silent = TRUE)
if (inherits(nc_time, "try-error")) {
nc_time <- ncvar_get(nc_nc, "time")
nc_time_units <- ncatt_get(nc_nc, "time", attname="units")$value
nc_time_origin <- strsplit(nc_time_units, " ")
nc_time_origin <- unlist(nc_time_origin)[3]
nc_time <- as.yearmon(ymd(nc_time_origin) + nc_time)
if (paste0(format(nc_time[1], format = "%Y"), format(nc_time[1], format = "%m")) != unname(components['tstart']) || paste0(format(nc_time[length(nc_time)], format = "%Y"), format(nc_time[length(nc_time)], format = "%m")) != unname(components['tend'])) {
stop(paste("Error processing time for", fs[f]))
}
} else {
nc_time <- as.yearmon(format(nc_time, format = "%Y-%m-%d hh:mm:ss"))
}
# Now load only that grid data
nc_var <- nc.get.var.subset.by.axes(nc_nc, var, axis.indices = list(X = lon_cell, Y = lat_cell))
# Close the nc connection
nc_close(nc_nc); rm(nc_nc)
# Note, does this still apply?
if (dim(nc_var)[1] > 1 || dim(nc_var)[2] > 1) {
warning("We got two latitud or longitude cells. We'll average across them.")
nc_var <- apply(nc_var, 3, mean)
} else {
nc_var <- nc_var[1, 1, ]
}
time_tmp <- tibble(Var = var,
Model = meta$Model[row],
Scenario = meta$Scenario[row],
Ensemble = meta$Ensemble[row],
Time = nc_time,
Year = format(as.yearmon(Time), format = "%Y"),
Month = format(as.yearmon(Time), format = "%m"),
Value = nc_var)
rm(nc_var, nc_time); gc()
attr(time_tmp, "cache_ver") <- cache_ver
st_point$set(key, time_tmp)
message("Cached data")
}
if (f == 1) {
time_series <- time_tmp
} else {
start_row <- which(time_tmp$Time == (time_series$Time[nrow(time_series)] + 1/12))
if (length(start_row) != 0) time_series <- suppressWarnings(bind_rows(time_series, time_tmp[start_row:nrow(time_tmp),]))
}
}
incProgress((nrow(meta)/no_of_cores)^-1, detail = paste(row, "of", nrow(meta)))
time_series$Scenario <- meta$Scenario[row] # This needs to be at the end so we don't override the other files.
time_series
}
})
message("Done collecting the NetCDF files...")
# Return our tibble
dat %>% distinct() ##FIXME: Why do I get so many dupes?
}
\ No newline at end of file
lookup_table_bounds <- function(choices) {
var <- unique(choices$Variable)
## As far as I can tell, the model bounds never change, regardless of scenario, experiment etc. So we just need one file for each
# Note, we could confirm this if it weren't for https://github.com/tidyverse/dplyr/issues/3088
choices <- choices %>% group_by(Model) %>% summarize(Filename = head(Filenames, 1))
get_bounds <- function(file_dir, var, x) {
nc_nc <- nc_open(file.path(file_dir, var, "verified", x))
lon_bnds <- try(ncvar_get(nc_nc, "lon_bnds"), silent = TRUE)
if (inherits(lon_bnds, "try-error")) {
lon_bnds <- ncvar_get(nc_nc, "lon_bounds")
}
lat_bnds <- try(ncvar_get(nc_nc, "lat_bnds"), silent = TRUE)
if (inherits(lat_bnds, "try-error")) {
lat_bnds <- ncvar_get(nc_nc, "lat_bounds")
}
c(Lat_Bot = list(lat_bnds[1,]), Lat_Top = list(lat_bnds[2,]),
Lon_Lef = list(lon_bnds[1,]), Lon_Rig = list(lon_bnds[2,]))
}
bounds <- choices$Filename %>%
map(~get_bounds(file_dir, var, .)) %>%
map_df(~tibble(Lat_Bot = .[1], Lat_Top = .[2], Lon_Lef = .[3], Lon_Rig = .[4]))
bind_cols(choices %>% dplyr::select(-`Filename`), bounds)
}
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
StripTrailingWhitespace: Yes
version: '2'
services:
conjuntool:
build: ./shiny
restart: always
volumes:
- ./shiny/local_settings.R:/srv/shiny-server/app/local_settings.R:ro
- ./shiny/shiny-server.conf:/etc/shiny-server/shiny-server.conf:ro
expose:
- 80
plumber:
build: ./plumber
restart: always
volumes:
- /data/climate_data:/data/climate_data
- ./plumber/plumber_settings.R:/plumber_settings.R:ro
lb:
image: 'dockercloud/haproxy:1.2.1'
restart: always
links:
- plumber:8000
volumes:
- /var/run/docker.sock:/var/run/docker.sock
haproxy:
image: eeacms/haproxy
restart: always
depends_on:
- conjuntool
volumes:
- ./configure.py:/configure.py
environment:
FRONTEND_PORT: "80"
BACKENDS: "conjuntool"
BACKENDS_PORT: "3838"
DNS_ENABLED: "true"
LOG_LEVEL: "info"
COOKIES_ENABLED: "true"
INTER: "10s"
FALL: "5"
expose:
- 80
local({
## Helper function to get the path to the library directory for a
## given packrat project.
getPackratLibDir <- function(projDir = NULL) {
path <- file.path("packrat", "lib", R.version$platform, getRversion())
if (!is.null(projDir)) {
## Strip trailing slashes if necessary
projDir <- sub("/+$", "", projDir)
## Only prepend path if different from current working dir
if (!identical(normalizePath(projDir), normalizePath(getwd())))
path <- file.path(projDir, path)
}
path
}
## Ensure that we set the packrat library directory relative to the
## project directory. Normally, this should be the working directory,
## but we also use '.rs.getProjectDirectory()' if necessary (e.g. we're
## rebuilding a project while within a separate directory)
libDir <- if (exists(".rs.getProjectDirectory"))
getPackratLibDir(.rs.getProjectDirectory())
else
getPackratLibDir()
## Unload packrat in case it's loaded -- this ensures packrat _must_ be
## loaded from the private library. Note that `requireNamespace` will
## succeed if the package is already loaded, regardless of lib.loc!
if ("packrat" %in% loadedNamespaces())
try(unloadNamespace("packrat"), silent = TRUE)
if (suppressWarnings(requireNamespace("packrat", quietly = TRUE, lib.loc = libDir))) {
# Check 'print.banner.on.startup' -- when NA and RStudio, don't print
print.banner <- packrat::get_opts("print.banner.on.startup")
if (print.banner == "auto" && is.na(Sys.getenv("RSTUDIO", unset = NA))) {
print.banner <- TRUE
} else {
print.banner <- FALSE
}
return(packrat::on(print.banner = print.banner))
}
## Escape hatch to allow RStudio to handle bootstrapping. This
## enables RStudio to provide print output when automagically
## restoring a project from a bundle on load.
if (!is.na(Sys.getenv("RSTUDIO", unset = NA)) &&
is.na(Sys.getenv("RSTUDIO_PACKRAT_BOOTSTRAP", unset = NA))) {
Sys.setenv("RSTUDIO_PACKRAT_BOOTSTRAP" = "1")
setHook("rstudio.sessionInit", function(...) {
# Ensure that, on sourcing 'packrat/init.R', we are
# within the project root directory
if (exists(".rs.getProjectDirectory")) {
owd <- getwd()
setwd(.rs.getProjectDirectory())
on.exit(setwd(owd), add = TRUE)
}
source("packrat/init.R")
})
return(invisible(NULL))
}
## Bootstrapping -- only performed in interactive contexts,
## or when explicitly asked for on the command line
if (interactive() || "--bootstrap-packrat" %in% commandArgs(TRUE)) {
message("Packrat is not installed in the local library -- ",
"attempting to bootstrap an installation...")
## We need utils for the following to succeed -- there are calls to functions
## in 'restore' that are contained within utils. utils gets loaded at the
## end of start-up anyhow, so this should be fine
library("utils", character.only = TRUE)
## Install packrat into local project library
packratSrcPath <- list.files(full.names = TRUE,
file.path("packrat", "src", "packrat")
)
## No packrat tarballs available locally -- try some other means of installation
if (!length(packratSrcPath)) {
message("> No source tarball of packrat available locally")
## There are no packrat sources available -- try using a version of
## packrat installed in the user library to bootstrap
if (requireNamespace("packrat", quietly = TRUE) && packageVersion("packrat") >= "0.2.0.99") {
message("> Using user-library packrat (",
packageVersion("packrat"),
") to bootstrap this project")
}
## Couldn't find a user-local packrat -- try finding and using devtools
## to install
else if (requireNamespace("devtools", quietly = TRUE)) {
message("> Attempting to use devtools::install_github to install ",
"a temporary version of packrat")
library(stats) ## for setNames
devtools::install_github("rstudio/packrat")
}
## Try downloading packrat from CRAN if available
else if ("packrat" %in% rownames(available.packages())) {
message("> Installing packrat from CRAN")
install.packages("packrat")
}
## Fail -- couldn't find an appropriate means of installing packrat
else {
stop("Could not automatically bootstrap packrat -- try running ",
"\"'install.packages('devtools'); devtools::install_github('rstudio/packrat')\"",
"and restarting R to bootstrap packrat.")
}
# Restore the project, unload the temporary packrat, and load the private packrat
packrat::restore(prompt = FALSE, restart = TRUE)
## This code path only reached if we didn't restart earlier
unloadNamespace("packrat")
requireNamespace("packrat", lib.loc = libDir, quietly = TRUE)
return(packrat::on())
}
## Multiple packrat tarballs available locally -- try to choose one
## TODO: read lock file and infer most appropriate from there; low priority because
## after bootstrapping packrat a restore should do the right thing
if (length(packratSrcPath) > 1) {
warning("Multiple versions of packrat available in the source directory;",
"using packrat source:\n- ", shQuote(packratSrcPath))
packratSrcPath <- packratSrcPath[[1]]
}
lib <- file.path("packrat", "lib", R.version$platform, getRversion())
if (!file.exists(lib)) {
dir.create(lib, recursive = TRUE)
}
lib <- normalizePath(lib, winslash = "/")
message("> Installing packrat into project private library:")
message("- ", shQuote(lib))
surround <- function(x, with) {
if (!length(x)) return(character())
paste0(with, x, with)
}
## The following is performed because a regular install.packages call can fail
peq <- function(x, y) paste(x, y, sep = " = ")
installArgs <- c(
peq("pkgs", surround(packratSrcPath, with = "'")),
peq("lib", surround(lib, with = "'")),
peq("repos", "NULL"),
peq("type", surround("source", with = "'"))
)
installCmd <- paste(sep = "",
"utils::install.packages(",
paste(installArgs, collapse = ", "),
")")
fullCmd <- paste(
surround(file.path(R.home("bin"), "R"), with = "\""),
"--vanilla",
"--slave",
"-e",
surround(installCmd, with = "\"")
)
system(fullCmd)
## Tag the installed packrat so we know it's managed by packrat
## TODO: should this be taking information from the lockfile? this is a bit awkward
## because we're taking an un-annotated packrat source tarball and simply assuming it's now
## an 'installed from source' version
## -- InstallAgent -- ##
installAgent <- 'InstallAgent: packrat 0.4.8-1'
## -- InstallSource -- ##
installSource <- 'InstallSource: source'
packratDescPath <- file.path(lib, "packrat", "DESCRIPTION")
DESCRIPTION <- readLines(packratDescPath)
DESCRIPTION <- c(DESCRIPTION, installAgent, installSource)
cat(DESCRIPTION, file = packratDescPath, sep = "\n")
# Otherwise, continue on as normal
message("> Attaching packrat")
library("packrat", character.only = TRUE, lib.loc = lib)
message("> Restoring library")
restore(restart = FALSE)
# If the environment allows us to restart, do so with a call to restore
restart <- getOption("restart")
if (!is.null(restart)) {
message("> Packrat bootstrap successfully completed. ",
"Restarting R and entering packrat mode...")
return(restart())
}
# Callers (source-erers) can define this hidden variable to make sure we don't enter packrat mode
# Primarily useful for testing
if (!exists(".__DONT_ENTER_PACKRAT_MODE__.") && interactive()) {
message("> Packrat bootstrap successfully completed. Entering packrat mode...")
packrat::on()
}
Sys.unsetenv("RSTUDIO_PACKRAT_BOOTSTRAP")
}
})
This diff is collapsed.
auto.snapshot: TRUE
use.cache: TRUE
print.banner.on.startup: auto
vcs.ignore.lib: TRUE
vcs.ignore.src: TRUE
external.packages:
local.repos:
load.external.packages.on.startup: TRUE
ignored.packages:
quiet.package.installation: TRUE
snapshot.recommended.packages: FALSE
snapshot.fields:
Imports
Depends
LinkingTo
FROM trestletech/plumber
MAINTAINER Conor Anderson <[email protected]>
COPY api/plumber.R /api/plumber.R
RUN echo 'deb http://deb.debian.org/debian bullseye main' > /etc/apt/sources.list
RUN apt-get update &&\
apt-get install -y --no-install-recommends curl libnetcdf-dev &&\
apt-get clean && rm -rf /tmp/* /var/lib/apt/lists/*
RUN cd /api &&\
Rscript -e "source('https://install-github.me/pacificclimate/ncdf4.helpers')" &&\
Rscript -e "source('https://gitlab.com/ConorIA/conjuntool/snippets/1788463/raw')" &&\
rm -rf /tmp/*
CMD ["/api/plumber.R"]
This diff is collapsed.
# Debug options
debug_flag = FALSE
# File storage location
# File tree should look like this:
#
# <file_dir>
# └── <cmip> (e.g. "CMIP5", "CMIP6")
# └── <var> (e.g. "tas", "pr")
# └── verified
# └── <filename>
#
file_dir = "/data/climate_data"
# Cache details
cache_root = file.path(file_dir, ".cache")
st_avg = storr::storr_rds(file.path(cache_root, "avg-cache"), default_namespace = "avg")
st_point = storr::storr_rds(file.path(cache_root, "point-cache"), default_namespace = "point")
st_meta = storr::storr_rds(file.path(cache_root, "meta-cache"), default_namespace = "meta")
cache_ver = "2018-12-06"
# Users and Passwords
users = c()
passwords = c()
aur/icu57
aur/udunits
community/netcdf
community/netcdf-fortran
community/gdal
Dockerfile
local_settings*
*.Rproj
FROM rocker/shiny
MAINTAINER Conor Anderson <[email protected]>
COPY . /srv/shiny-server/app
RUN apt-get update &&\
apt-get install -y --no-install-recommends curl libgdal-dev libjq-dev libnetcdf-dev libssl-dev libudunits2-dev &&\
apt-get clean && rm -rf /tmp/* /var/lib/apt/lists/*
RUN mv /srv/shiny-server/app/shiny-server.conf /etc/shiny-server/shiny-server.conf
RUN cd /srv/shiny-server/app &&\
Rscript -e "install.packages(\"devtools\", repos = \"https://cloud.r-project.org/\")" &&\
Rscript -e "source(\"https://gitlab.com/ConorIA/conjuntool/snippets/1788463/raw\")" &&\
Rscript -e "devtools::install_github(\"rstudio/DT\")" &&\
sudo -u shiny bash -c "Rscript -e \"webshot::install_phantomjs()\"" &&\
rm -rf /tmp/*
add_MMM <- function(datin) {
grouping <- c("Var", "Scenario", "Ensemble")
if (has_name(datin, "Month")) grouping <- c(grouping, "Month")
if (has_name(datin, "Season")) grouping <- c(grouping, "Season")
drop <- "Model"
if (has_name(datin, "Note")) drop <- c(drop, "Note")
bind_rows(
datin,
dplyr::select(datin, -drop) %>%
group_by_at(grouping) %>%
summarize_all(mean) %>%
add_column(Model = "MMM", .before = which(names(datin) == "Model"))
)
}
get_bounds <- function(var) {
if (debug_flag) message(sprintf("Getting model bounds for %s", var))
r <- POST(URLencode(paste0(plumber_address, "/bounds?var=", var)),
config = list(add_headers(accept = "application/octet-stream")),
authenticate(plumber_user, plumber_password))
stop_for_status(r)
unserialize(content(r))
}
\ No newline at end of file
get_cache <- function(key) {
if (debug_flag) message("Asking plumber for ", key)
r <- POST(URLencode(paste0(plumber_address, "/cacheget?key=", key)),
config = list(add_headers(accept = "application/octet-stream")),
authenticate(plumber_user, plumber_password))
stop_for_status(r)
unserialize(content(r))
}
set_cache <- function(key, table) {
if (debug_flag) message("Asking plumber to save ", key)
r <- POST(URLencode(paste0(plumber_address, "/cacheset?key=", key)),
config = list(add_headers(accept = "application/octet-stream")),
authenticate(plumber_user, plumber_password),
body = jsonlite::toJSON(table), encoding = "json")
stop_for_status(r)
if (r$status_code != 200) warning("There was an error saving this table.")
}
get_choices <- function(var, lim = FALSE) {
if (debug_flag) message(sprintf("Getting choices for %s", var))
r <- POST(URLencode(paste0(plumber_address, "/choices?var=", var,
"&lim=", as.character(lim))),
config = list(add_headers(accept = "application/octet-stream")),
authenticate(plumber_user, plumber_password))
stop_for_status(r)
unserialize(content(r))
}
get_gcm_ts <- function(meta) {
if (debug_flag) message("Collecting the NetCDF time series...")
showNotification("We'll collect the necessary files.")
keys <- unlist(apply(meta, 1, function(x) { paste(x$Files, x$Lat_Cell, x$Lon_Cell, sep = "_") }))
scenarios <- unlist(apply(meta, 1, function(x) { rep(x$Scenario, length(x$Files)) }))
urls = paste0(plumber_address, "/timeseries?key=", keys)
reqs <- lapply(urls, function(x) {
HttpRequest$new(
url = x,
opts = list(timeout = 6e4),
auth = auth(plumber_user, plumber_password, "basic")
)$post()
})
res <- AsyncVaried$new(.list = reqs)
res$request()