Commit 1f6bb16d authored by Dan Baston's avatar Dan Baston

Add script to download and aggregate CPC global daily temp

parent f130ce54
#!/usr/bin/env Rscript
# Copyright (c) 2019 ISciences, LLC.
# All rights reserved.
# WSIM is licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License. You may
# obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
Download CPC Global Daily Temperature data and compute monthly means
Usage: get_cpc_monthly_mean_temperature.R (--workdir=<dirname> --yearmon=<yearmon> --output=<file>)
--yearmon <yearmon> Year and month of data to read
--workdir <dirname> Path of working directory where raw files are stored
--output <file> Path of output netCDF file
byte_offset <- function(day, pixel=1) {
sz <- 4
nx <- 720
ny <- 360
nvars <- 4
((day-1)*nx*ny*nvars + (pixel - 1))*sz
day_number <- function(year, month, day) {
as.integer(strftime(ISOdate(year, month, day), '%j'))
#' Download days from \code{first_day} to \code{last_day} to a temporary file, and return that file
download_days <- function(url, first_day, last_day) {
start <- byte_offset(first_day)
stop <- byte_offset(last_day + 1) - 1
fname <- tempfile()
expected_size <- (stop - start) + 1
infof('Downloading %d bytes (%d - %d) from %s', expected_size, start, stop, url)
args=c('-r', sprintf('%d-%d', start, stop),
'-o', fname,
# We can't check curl return code because it may download all of the
# data and then time out while closing the connection. So we call it
# a success if we got as many bytes as we asked for.
received_size <- file.size(fname)
if (received_size != expected_size) {
stop(sprintf('Failed to download bytes %d-%d from %s (received %d bytes)', start, stop, url, received_size))
#' Check whether a date is available in an uncompressed file
is_date_available <- function(url, year, month, day) {
pixel <- 98200
start <- byte_offset(day_number(year, month, day), pixel)
stop <- start + 3
fname <- tempfile()
args=c('-r', sprintf('%d-%d', start, stop),
'--max-time', 3,
'-o', fname,
result <- isTRUE(readBin(fname, 'numeric', n=1, size=4, endian='little') != -999)
main <- function(raw_args) {
args <- parse_args(usage, raw_args)
year <- as.integer(substr(args$yearmon, 1, 4))
month <- as.integer(substr(args$yearmon, 5, 6))
outfile <- args$output
workdir <- args$workdir
if (!can_write(outfile)) {
stop(sprintf('Cannot write to %s. Does the directory exist?', outfile))
if (!dir.exists(workdir)) {
dir.create(workdir, recursive=TRUE)
start_doy <- day_number(year, month, 1)
end_doy <- day_number(year, month, days_in_month(ISOdate(year, month, 1)))
file_url <- sprintf('', year)
file_name <- sprintf('CPC_GLOBAL_T_V0.x_0.5deg.lnx.%04d.gz', year)
fpath <- file.path(workdir, file_name)
# We have to go through a few hoops to find the data. For a completed year, the data will probably
# be found in gzipped format. For the most recent year, the data will probably be found uncompressed.
# Since we have no way to know if the files have been gzipped yet, we have to check for both
# cases.
found_gzfile <- file.exists(fpath)
if (!found_gzfile) {
infof('Attempting to download compressed data for %s', year)
# No need to panic if the file doesn't exist, so squelch any noisy output.
found_gzfile <- (download.file(file_url, fpath, quiet=TRUE) == 0)
}, silent=TRUE)
if (!found_gzfile) {
infof('Compressed data not found for %s. Checking for uncompressed data.', year)
# Now things get weird. If the uncompressed file is found, it may or may not include data for
# the month of interest. But we can't determine this from the file size alone; if the data is
# not yet available, the file will still be present and populated with NODATA value. So we
# carefully construct an FTP request for a single byte, representing a pixel that should have
# data, on the last day of the month.
uncompressed_url <- sub('.gz', '', file_url, fixed=TRUE)
last_day_in_month <- days_in_month(ISOdate(year, month, 1))
if (is_date_available(uncompressed_url, year, month, last_day_in_month)) {
infof('Data is available for %04d-%02d-%02d. Downloading.', year, month, last_day_in_month)
# It looks like the requested data is available, so we now download all dates of interest
# to a temporary file.
fpath <- download_days(uncompressed_url, start_doy, end_doy)
end_doy <- (end_doy - start_doy) + 1
start_doy <- 1
} else {
stop(sprintf('Data not yet available for %04d-%02d-%02d', year, month, last_day_in_month))
infof('Extracting data for %04d%02d (days %d-%d) from %s', year, month, start_doy, end_doy, fpath)
tmin <- read_noaa_cpc_global_daily_temp(fpath, 'tmin', start_doy, end_doy)
tmax <- read_noaa_cpc_global_daily_temp(fpath, 'tmax', start_doy, end_doy)
tavg <- 0.5*(tmin + tmax)
vars = list(
tmin = wsim.distributions::stack_mean(tmin),
tmax = wsim.distributions::stack_mean(tmax),
tavg = wsim.distributions::stack_mean(tavg)),
filename = outfile,
extent = c(-180, 180, -90, 90),
attrs = list(
list(var='tmin', key='units', val='degree_Celsius'),
list(var='tmax', key='units', val='degree_Celsius'),
list(var='tavg', key='units', val='degree_Celsius')))
infof('Wrote min, max, and avg temperatures for %04d%02d to %s', year, month, outfile)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment