...
 
Commits (3)
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
## R package 'database'. ## R package 'database'.
## Copyright Enrico Schumann 2010-2017 ## Copyright Enrico Schumann 2010-2017
## ---------------- time ## ---------------- time
ttime <- function(x, from = "datetime", to = "numeric", ttime <- function(x, from = "datetime", to = "numeric",
tz = "", strip.attr = TRUE, tz = "", strip.attr = TRUE,
format = "%Y-%m-%d") { format = "%Y-%m-%d") {
if (from == "datetime" && to == "numeric") { if (from == "datetime" && to == "numeric") {
if (strip.attr) if (strip.attr)
c(unclass(x)) else unclass(x) c(unclass(x)) else unclass(x)
...@@ -125,7 +125,6 @@ write_ts_table <- function(ts, dir, file, ...@@ -125,7 +125,6 @@ write_ts_table <- function(ts, dir, file,
## only write if there are rows (ans > 0): ## only write if there are rows (ans > 0):
## e.g., if 'add' was true but no new data were ## e.g., if 'add' was true but no new data were
## found, there is no need to rewrite the table ## found, there is no need to rewrite the table
write.table(as.matrix(data.frame(timestamp, unclass(ts))), write.table(as.matrix(data.frame(timestamp, unclass(ts))),
file = dfile, file = dfile,
row.names = FALSE, row.names = FALSE,
...@@ -138,12 +137,12 @@ write_ts_table <- function(ts, dir, file, ...@@ -138,12 +137,12 @@ write_ts_table <- function(ts, dir, file,
dir <- dbConnect(MonetDBLite::MonetDBLite(), dir) dir <- dbConnect(MonetDBLite::MonetDBLite(), dir)
on.exit(dbDisconnect(dir, shutdown = TRUE)) on.exit(dbDisconnect(dir, shutdown = TRUE))
} }
df <- data.frame(timestamp, unclass(ts)) df <- data.frame(timestamp, unclass(ts))
colnames(df) <- c("timestamp", columns) colnames(df) <- c("timestamp", columns)
dbWriteTable(dir, dbQuoteIdentifier(dir, file), df, dbWriteTable(dir, dbQuoteIdentifier(dir, file), df,
overwrite = overwrite) overwrite = overwrite)
} else } else
stop("unknown backend") stop("unknown backend")
invisible(ans) invisible(ans)
...@@ -158,7 +157,6 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -158,7 +157,6 @@ read_ts_tables <- function(file, dir, t.type = "guess",
backend <- tolower(backend) backend <- tolower(backend)
if (backend == "csv") { if (backend == "csv") {
### **************** ### ****************
...@@ -168,18 +166,18 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -168,18 +166,18 @@ read_ts_tables <- function(file, dir, t.type = "guess",
} else { } else {
dfile <- file.path(dir, file) dfile <- file.path(dir, file)
} }
if (length(dir) != length(file)) { if (length(dir) != length(file)) {
if (length(dir) > 1L && length(file) > 1L) if (length(dir) > 1L && length(file) > 1L)
stop("file and dir lengths must match") stop("file and dir lengths must match")
if (length(file) > 1L) { if (length(file) > 1L) {
dir <- rep.int(dir, length(file)) dir <- rep.int(dir, length(file))
} else if (length(dir) > 1L) } else if (length(dir) > 1L)
file <- rep.int(file, length(dir)) file <- rep.int(file, length(dir))
else else
stop("check lengths of file and dir") stop("check lengths of file and dir")
} }
if (t.type == "guess" || missing(columns)) if (t.type == "guess" || missing(columns))
...@@ -198,18 +196,18 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -198,18 +196,18 @@ read_ts_tables <- function(file, dir, t.type = "guess",
strsplit(samp[[1]], ",", fixed = TRUE)[[1]]) strsplit(samp[[1]], ",", fixed = TRUE)[[1]])
columns <- tmp[-1L] columns <- tmp[-1L]
} }
if (t.type == "Date") { if (t.type == "Date") {
start <- if (missing(start)) start <- if (missing(start))
as.Date("1970-01-01") as.Date("1970-01-01")
else else
as.Date(start) as.Date(start)
end <- if (missing(end)) end <- if (missing(end))
previous_businessday(Sys.Date()) previous_businessday(Sys.Date())
else else
as.Date(end) as.Date(end)
timestamp <- seq(start, end , "1 day") timestamp <- seq(start, end , "1 day")
if (drop.weekends) if (drop.weekends)
timestamp <- timestamp[is_businessday(timestamp)] timestamp <- timestamp[is_businessday(timestamp)]
...@@ -219,7 +217,7 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -219,7 +217,7 @@ read_ts_tables <- function(file, dir, t.type = "guess",
as.POSIXct(Sys.Date() - 365) as.POSIXct(Sys.Date() - 365)
else else
as.POSIXct(start) as.POSIXct(start)
if (missing(end)) if (missing(end))
end <- as.POSIXct(previous_businessday(Sys.Date())) end <- as.POSIXct(previous_businessday(Sys.Date()))
else else
...@@ -227,20 +225,21 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -227,20 +225,21 @@ read_ts_tables <- function(file, dir, t.type = "guess",
timestamp <- seq(start, end , "1 sec") timestamp <- seq(start, end , "1 sec")
} else } else
stop("unknown ", sQuote("t.type")) stop("unknown ", sQuote("t.type"))
nc <- length(columns) nc <- length(columns)
results <- array(NA_real_, results <- array(NA_real_,
dim = c(length(timestamp), length(dfile)*nc)) dim = c(length(timestamp), length(dfile)*nc))
for (i in seq_along(dfile)) { for (i in seq_along(dfile)) {
tmp <- read.table(dfile[[i]], sep = ",", tmp <- read.table(dfile[[i]], sep = ",",
stringsAsFactors = FALSE, stringsAsFactors = FALSE,
header = TRUE, colClasses = "numeric") header = TRUE, colClasses = "numeric")
ii <- fmatch(tmp[[1L]], timestamp, nomatch = 0L) ii <- fmatch(tmp[[1L]], timestamp, nomatch = 0L)
tmp.names <- colnames(tmp) tmp.names <- colnames(tmp)
if (!all(columns %in% tmp.names)) { if (!all(columns %in% tmp.names)) {
warning("columns missing") warning("columns missing")
tmp <- cbind(tmp, rep(NA, sum(!(columns %in% tmp.names)))) tmp <- cbind(tmp, rep(NA, sum(!(columns %in% tmp.names))))
colnames(tmp) <- c(tmp.names, columns[!(columns %in% tmp.names)]) colnames(tmp) <- c(tmp.names,
columns[!(columns %in% tmp.names)])
} }
res <- tmp[ , columns, drop = FALSE][ii > 0L, ] res <- tmp[ , columns, drop = FALSE][ii > 0L, ]
if (!is.null(res)) if (!is.null(res))
...@@ -258,7 +257,7 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -258,7 +257,7 @@ read_ts_tables <- function(file, dir, t.type = "guess",
colnames[[i]] <- gsub("%file%", .file[[i]], colnames[[i]]) colnames[[i]] <- gsub("%file%", .file[[i]], colnames[[i]])
colnames[[i]] <- gsub("%column%", .columns[[i]], colnames[[i]]) colnames[[i]] <- gsub("%column%", .columns[[i]], colnames[[i]])
} }
} else if (backend == "monetdb") { } else if (backend == "monetdb") {
### ******************** ### ********************
...@@ -266,9 +265,9 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -266,9 +265,9 @@ read_ts_tables <- function(file, dir, t.type = "guess",
dir <- dbConnect(MonetDBLite::MonetDBLite(), dir) dir <- dbConnect(MonetDBLite::MonetDBLite(), dir)
on.exit(dbDisconnect(dir, shutdown = TRUE)) on.exit(dbDisconnect(dir, shutdown = TRUE))
} }
dbGetQuery(dir, "SELECT * FROM file;") dbGetQuery(dir, "SELECT * FROM file;")
} else } else
stop("unknown backend") stop("unknown backend")
...@@ -289,7 +288,7 @@ read_ts_tables <- function(file, dir, t.type = "guess", ...@@ -289,7 +288,7 @@ read_ts_tables <- function(file, dir, t.type = "guess",
ans ans
} else { } else {
stop("unknown ", sQuote("return.class")) stop("unknown ", sQuote("return.class"))
} }
} }
## x <- scan("~/tsdb/daily/cmcier", what= list(numeric(0), numeric(0)), ## x <- scan("~/tsdb/daily/cmcier", what= list(numeric(0), numeric(0)),
...@@ -323,7 +322,7 @@ file_info <- function(dir, file) { ...@@ -323,7 +322,7 @@ file_info <- function(dir, file) {
min.timestamp = NA, min.timestamp = NA,
max.timestamp = NA, max.timestamp = NA,
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
for (i in seq_len(nf)) { for (i in seq_len(nf)) {
if (!res[["exists"]][i]) if (!res[["exists"]][i])
next next
...@@ -409,14 +408,14 @@ as.data.frame.ts_table <- function(x, ...@@ -409,14 +408,14 @@ as.data.frame.ts_table <- function(x,
timestamp <- attr(x, "timestamp") timestamp <- attr(x, "timestamp")
col <- attr(x, "columns") col <- attr(x, "columns")
if (!is.null(row.names)) { if (!is.null(row.names)) {
ans <- data.frame(unclass(x), stringsAsFactors = FALSE) ans <- data.frame(unclass(x), stringsAsFactors = FALSE)
row.names(ans) <- as.character(timestamp) row.names(ans) <- as.character(timestamp)
names(ans) <- col names(ans) <- col
} else { } else {
ans <- cbind(timestamp = timestamp, ans <- cbind(timestamp = timestamp,
data.frame(unclass(x), stringsAsFactors = FALSE)) data.frame(unclass(x), stringsAsFactors = FALSE))
names(ans) <- c("timestamp", col) names(ans) <- c("timestamp", col)
} }
ans ans
} }
...@@ -442,15 +441,14 @@ print.ts_table <- function(x, ...) { ...@@ -442,15 +441,14 @@ print.ts_table <- function(x, ...) {
if (nrow(x)) if (nrow(x))
cat(nrow(x), " rows [", cat(nrow(x), " rows [",
as.character(from_to[[1L]]), " -> ", as.character(from_to[[1L]]), " -> ",
as.character(from_to[[2]]), as.character(from_to[[2]]),
"]: ", "]: ",
paste(attr(x, "columns"), collapse = ", "), paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "") "\n", sep = "")
else else
cat(nrow(x), " rows : ", cat(nrow(x), " rows : ",
paste(attr(x, "columns"), collapse = ", "), paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "") "\n", sep = "")
invisible(x) invisible(x)
} }
...@@ -459,6 +457,5 @@ adjust_ts_table <- function(ts, dividends, splits, splits.first = TRUE) { ...@@ -459,6 +457,5 @@ adjust_ts_table <- function(ts, dividends, splits, splits.first = TRUE) {
} }
rm_ts_table <- function(file, dir, ..., trash.bin = ".trash.bin") { rm_ts_table <- function(file, dir, ..., trash.bin = ".trash.bin") {
} }
...@@ -429,23 +429,31 @@ In read_ts_tables(c("example1", "example2"), dir = "~/tsdb/daily", : ...@@ -429,23 +429,31 @@ In read_ts_tables(c("example1", "example2"), dir = "~/tsdb/daily", :
Attached to this matrix are several attributes: Attached to this matrix are several attributes:
- timestamp :: the timestamp, which can be either of class Date or of - timestamp :: a vector: the numeric representation of
class =POSIXct= the timestamp
- columns :: a character vector that provides the columns names - t.type :: character: the class of the original
timestamp, either =Date= or =POSIXct=
(There may be other attributes as well, but these two - columns :: a character vector that provides the
columns names
(There may be other attributes as well, but these three
are always present.) are always present.)
A =ts_table= are not meant as a time-series class. For A =ts_table= are not meant as a time-series class. For
most computations (plotting, calculation of statistics, most computations (plotting, calculation of statistics,
etc), the =ts_table= must first be coerced to zoo, xts, etc), the =ts_table= must first be coerced to zoo, xts,
dataframe or a similar data structure. dataframe or a similar data structure. Methods that
perform such coercions are responsible for converting
the numeric timestamp vector to an actual
timestamp. For this, they may use the internal function
=ttime= (translate time).
** The file format ** The file format
tsdb can store and load time-series data. The format it tsdb can store and load time-series data. The format
uses is plain CSV; a sample file my look as follows: it uses is plain CSV; a sample file my look as
follows:
#+BEGIN_EXAMPLE #+BEGIN_EXAMPLE
"timestamp","close" "timestamp","close"
...@@ -456,19 +464,23 @@ uses is plain CSV; a sample file my look as follows: ...@@ -456,19 +464,23 @@ uses is plain CSV; a sample file my look as follows:
17135,15 17135,15
#+END_EXAMPLE #+END_EXAMPLE
Thus, the file has a header line that gives the names Thus, the file has a header line that gives the
of the columns, with the first column always being names of the columns, with the first column always
named =timestamp=. being named =timestamp=.
The advantage of this plain format is that the data are The advantage of this plain format is that the data are
in no way dependent on =tsdb=. The files can be used in no way dependent on =tsdb=. The files can be used
and manipulated by other software as well. and manipulated by other software as well.
** Timestamps ** Timestamps
:PROPERTIES: :PROPERTIES:
:CUSTOM_ID: sec:timestamps :CUSTOM_ID: sec:timestamps
:END: :END:
Daily timestamps are represented as the number of days Two types of timestamps are supported: =Date= and
since 1 Jan 1970; intraday timestamps are the number of =POSXIct=. As part of a =ts_table=, timestamps are
seconds since 1 Jan 1970. always stored in their numeric representation: Daily
timestamps are represented as the number of days
since 1 Jan 1970; intraday timestamps are the number
of seconds since 1 Jan 1970.