Commit c5cbbcfb authored by Enrico Schumann's avatar Enrico Schumann

[as.ts_table] Add method ts_table method

parent 831a3620
2019-03-11 Enrico Schumann <es@enricoschumann.net>
* R/functions.R (as.ts_table.ts_table): add method
2019-02-14 Enrico Schumann <es@enricoschumann.net>
* DESCRIPTION (Imports, Suggests): move package
......
......@@ -2,7 +2,7 @@ Package: tsdb
Type: Package
Title: Terribly-Simple Data Base for Time Series
Version: 0.6-0
Date: 2019-02-14
Date: 2019-03-11
Maintainer: Enrico Schumann <es@enricoschumann.net>
Authors@R: person(given = "Enrico", family = "Schumann",
role = c("aut", "cre"),
......
......@@ -7,14 +7,6 @@ export(
write_ts_table
)
## importFrom("DBI",
## "dbConnect",
## "dbDisconnect",
## "dbGetQuery",
## "dbQuoteIdentifier",
## "dbWriteTable"
## )
importFrom("datetimeutils",
"is_businessday",
"previous_businessday")
......@@ -28,6 +20,12 @@ importFrom("utils",
importFrom("zoo",
"zoo", "coredata", "index", "as.zoo")
S3method(print, ts_table)
S3method(as.ts_table, ts_table)
S3method(as.ts_table, zoo)
S3method(as.data.frame, ts_table)
S3method(as.matrix, ts_table)
S3method(as.zoo, ts_table)
S3method(print, file_info)
S3method(print, ts_table)
## The package uses code from Enrico Schumann's
## R package 'database'.
## Copyright Enrico Schumann 2010-2018
## ---------------- time
## Copyright Enrico Schumann 2010-2019
ttime <- function(x, from = "datetime", to = "numeric",
tz = "", strip.attr = TRUE,
......@@ -10,7 +8,9 @@ ttime <- function(x, from = "datetime", to = "numeric",
if (from == "datetime" && to == "numeric") {
if (strip.attr)
c(unclass(x)) else unclass(x)
c(unclass(x))
else
unclass(x)
} else if (from == "numeric" && to == "Date") {
class(x) <- "Date"
x
......@@ -28,31 +28,44 @@ ttime <- function(x, from = "datetime", to = "numeric",
## --------------------- ts_table
.timestamp <- function(x)
attr(x, "timestamp")
`.timestamp<-` <- function(x, value) {
attr(x, "timestamp") <- value
x
}
.columns <- function(x)
attr(x, "columns")
`.columns<-` <- function(x, value) {
attr(x, "columns") <- value
x
}
.t.type <- function(x)
attr(x, "t.type")
`.t.type<-` <- function(x, value) {
attr(x, "t.type") <- value
x
ts_table <- function(data, timestamp, columns) {
if (!inherits(timestamp, "Date") &&
!inherits(timestamp, "POSIXt"))
stop(sQuote("timestamp"), " must be Date or POSIXt")
## TODO if character, match regexp and then coerce
## to Date/POSIXct?
if (inherits(timestamp, "POSIXlt")) {
timestamp <- ttime(as.POSIXct(timestamp))
t.type <- "POSIXct"
}
if (inherits(timestamp, "POSIXct")) {
timestamp <- ttime(timestamp)
t.type <- "POSIXct"
}
if (inherits(timestamp, "Date")) {
timestamp <- ttime(timestamp)
t.type <- "Date"
}
ans <- as.matrix(data)
if (missing(columns))
columns <- colnames(ans)
ans <- unname(ans)
if (is.null(columns))
stop("no column names, and ", sQuote("columns"), " not provided")
if (ncol(ans) != length(columns))
stop("more columns than column names")
if (is.unsorted(timestamp)) {
ii <- order(timestamp)
timestamp <- timestamp[ii]
ans <- ans[ii, , drop = FALSE]
}
attr(ans, "timestamp") <- timestamp
attr(ans, "t.type") <- t.type
attr(ans, "columns") <- columns
class(ans) <- "ts_table"
ans
}
write_ts_table <- function(ts, dir, file,
add = FALSE,
overwrite = FALSE,
......@@ -130,7 +143,7 @@ write_ts_table <- function(ts, dir, file,
sep = ",")
}
} else if (backend == "monetdb") {
if (!inherits(dir, "MonetDBEmbeddedConnection")) {
dir <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dir)
on.exit(DBI::dbDisconnect(dir, shutdown = TRUE))
......@@ -308,101 +321,37 @@ read_ts_tables <- function(file, dir, t.type = "guess",
}
}
## x <- scan("~/tsdb/daily/cmcier", what= list(numeric(0), numeric(0)),
## skip = 1, sep = ",", multi.line=FALSE)
## read.table("~/tsdb/daily/cmcier", colClasses = "numeric", header = TRUE)
dir_info <- function(dir = getwd()) {
res <- dir()
class(res) <- "dir_info"
}
print.dir_info <- function(x, ...) {
print(unclass(x))
print.ts_table <- function(x, ...) {
tmp <- .timestamp(x)
from_to <- if (length(tmp))
ttime(range(tmp), "numeric", .t.type(x))
else
c(NA, NA)
if (nrow(x))
cat(nrow(x), " rows [",
as.character(from_to[[1L]]), " -> ",
as.character(from_to[[2]]),
"]: ",
paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "")
else
cat(nrow(x), " rows : ",
paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "")
invisible(x)
}
file_info <- function(dir, file) {
dfile <- if (missing(dir))
file
else
file.path(dir, file)
nf <- length(dfile)
res <- data.frame(file = file,
dir_file = dfile,
exists = file.exists(dfile),
columns = character(nf),
nrows = NA,
t.type = NA,
min.timestamp = NA,
max.timestamp = NA,
stringsAsFactors = FALSE)
for (i in seq_len(nf)) {
if (!res[["exists"]][i])
next
fi <- try(read_ts_tables(dfile[i], return.class = NULL), silent = TRUE)
if (inherits(fi, "try-error"))
next
res[["nrows"]][i] <- length(fi$timestamp)
if (length(fi$timestamp)) {
res[["min.timestamp"]][i] <- min(fi$timestamp)
res[["max.timestamp"]][i] <- max(fi$timestamp)
res[["t.type"]][i] <- class(fi$timestamp)
}
}
class(res) <- c("file_info", "data.frame")
res
}
print.file_info <- function(x, ...) {
print(x, ...)
}
ts_table <- function(data, timestamp, columns) {
if (!inherits(timestamp, "Date") &&
!inherits(timestamp, "POSIXt"))
stop(sQuote("timestamp"), " must be Date or POSIXt")
## TODO if character, match regexp and then coerce
## to Date/POSIXct?
if (inherits(timestamp, "POSIXlt")) {
timestamp <- ttime(as.POSIXct(timestamp))
t.type <- "POSIXct"
}
if (inherits(timestamp, "POSIXct")) {
timestamp <- ttime(timestamp)
t.type <- "POSIXct"
}
if (inherits(timestamp, "Date")) {
timestamp <- ttime(timestamp)
t.type <- "Date"
}
ans <- as.matrix(data)
if (missing(columns))
columns <- colnames(ans)
ans <- unname(ans)
if (is.null(columns))
stop("no column names, and ", sQuote("columns"), " not provided")
if (ncol(ans) != length(columns))
stop("more columns than column names")
if (is.unsorted(timestamp)) {
ii <- order(timestamp)
timestamp <- timestamp[ii]
ans <- ans[ii, , drop = FALSE]
}
attr(ans, "timestamp") <- timestamp
attr(ans, "t.type") <- t.type
attr(ans, "columns") <- columns
class(ans) <- "ts_table"
ans
}
## --------------------- COERCION
as.ts_table <- function(x, ...) {
UseMethod("as.ts_table")
}
as.ts_table.ts_table <- function(x, ...)
x
as.ts_table.zoo <- function(x, columns, ...) {
cols <- if (missing(columns))
colnames(x)
......@@ -449,26 +398,51 @@ as.matrix.ts_table <- function(x, ...) {
ans
}
print.ts_table <- function(x, ...) {
tmp <- .timestamp(x)
from_to <- if (length(tmp))
ttime(range(tmp), "numeric", .t.type(x))
else
c(NA, NA)
if (nrow(x))
cat(nrow(x), " rows [",
as.character(from_to[[1L]]), " -> ",
as.character(from_to[[2]]),
"]: ",
paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "")
else
cat(nrow(x), " rows : ",
paste(attr(x, "columns"), collapse = ", "),
"\n", sep = "")
invisible(x)
## --------------------- file_info
file_info <- function(dir, file) {
dfile <- if (missing(dir))
file
else
file.path(dir, file)
nf <- length(dfile)
res <- data.frame(file = file,
dir_file = dfile,
exists = file.exists(dfile),
columns = character(nf),
nrows = NA,
t.type = NA,
min.timestamp = NA,
max.timestamp = NA,
stringsAsFactors = FALSE)
for (i in seq_len(nf)) {
if (!res[["exists"]][i])
next
fi <- try(read_ts_tables(dfile[i], return.class = NULL), silent = TRUE)
if (inherits(fi, "try-error"))
next
res[["nrows"]][i] <- length(fi$timestamp)
if (length(fi$timestamp)) {
res[["min.timestamp"]][i] <- min(fi$timestamp)
res[["max.timestamp"]][i] <- max(fi$timestamp)
res[["t.type"]][i] <- class(fi$timestamp)
}
}
class(res) <- c("file_info", "data.frame")
res
}
print.file_info <- function(x, ...) {
print(x, ...)
}
## --------------------- internal/incomplete functions
adjust_ts_table <- function(ts, dividends, splits, splits.first = TRUE) {
}
......@@ -476,3 +450,38 @@ adjust_ts_table <- function(ts, dividends, splits, splits.first = TRUE) {
rm_ts_table <- function(file, dir, ..., trash.bin = ".trash.bin") {
}
dir_info <- function(dir = getwd()) {
res <- dir()
class(res) <- "dir_info"
res
}
print.dir_info <- function(x, ...) {
print(unclass(x))
invisible(x)
}
.timestamp <- function(x)
attr(x, "timestamp")
`.timestamp<-` <- function(x, value) {
attr(x, "timestamp") <- value
x
}
.columns <- function(x)
attr(x, "columns")
`.columns<-` <- function(x, value) {
attr(x, "columns") <- value
x
}
.t.type <- function(x)
attr(x, "t.type")
`.t.type<-` <- function(x, value) {
attr(x, "t.type") <- value
x
}
......@@ -114,18 +114,19 @@
* About tsdb
A terribly-simple data base for numeric time series. All series
are saved as CSV files. The package offers utilities
for saving files in a standardised format, and for
retrieving and joining data.
A terribly-simple data base for numeric time
series. All series are saved as CSV files. The package
offers utilities for saving files in a standardised
format, and for retrieving and joining data.
** Good things about tsdb
- no setup needed, no system dependencies
(i.e. external software, such as a database)
- completely portable; moving from one computer to
another requires no effort (the only thing to take
care of is file encoding)
another requires no effort other than copying the
files (the only thing to take care of is file
encoding if non-ASCII column names are used)
- data usable by other software
......@@ -172,7 +173,7 @@ ts
: 5 rows [2016-01-01 -> 2016-01-05]: A
Note that we had to provide a column name (=A=) for the
data. That is not optional. It is one of the things
data. This is not optional. It is one of the things
that =ts_table= enforces. Another is that timestamps
need to be of class =Date= or =POSIXct=.
......@@ -199,7 +200,8 @@ The written file will look like this:
You may notice that the dates have been replaced by
numbers. The mapping between these numbers and calendar
times is described later, when we discuss the
representation of timestamps.
representation of timestamps. (But if you can't wait:
it is the number of days since 1 January 1970.)
Let us write a second file. This time, we use
=ts_table= directly.
......@@ -268,6 +270,7 @@ The written file looks like this:
16810,10,20
#+END_EXAMPLE
** Reading data
Use the function =read_ts_tables=.
......@@ -324,8 +327,9 @@ More convenient may be to specify a =return.class=.
But wait. We provided and wrote to the file values for
1 January to 5 January. But we only got values for 1, 4
and 5 January. The reason is that tsdb was written with
financial data in mind, and on weekends there are no prices.
and 5 January. The reason is that =tsdb= was written
with financial data in mind, and on weekends there are
no prices.
#+BEGIN_SRC R :session *R* :results output :exports both
weekdays(as.Date("2016-1-1")+0:4)
#+END_SRC
......@@ -379,9 +383,10 @@ a single table, but we read tables.
10 2016-01-10 NA 20
#+end_example
The column names of the returned object consist of the filepaths and
the column, which may be more information than we actually want. The
argument =column.name= specifies the format; its default is
The column names of the returned object consist of the
filepaths and the column, which may be more information
than we actually want. The argument =column.name=
specifies the format; its default is
=%dir%/%file%::%column%=.
#+BEGIN_SRC R :session *R* :results output :exports both
read_ts_tables(c("example1", "example2"),
......@@ -408,8 +413,8 @@ argument =column.name= specifies the format; its default is
#+end_example
Missing values are by default set to =NA=. That happens even for
missing columns, with a warning, though.
Missing values are by default set to =NA=. That happens
even for missing columns, with a warning though.
#+BEGIN_SRC R :session *R* :results output :exports both
read_ts_tables(c("example1", "example2"),
dir = "~/tsdb/daily",
......@@ -458,8 +463,8 @@ In read_ts_tables(c("example1", "example2"), dir = "~/tsdb/daily", :
- columns :: a character vector that provides the
columns names
(There may be other attributes as well, but these three
are always present.)
There may be other attributes as well, but these three
are always present.
A =ts_table= is not meant as a time-series class. For
most computations (plotting, calculation of statistics,
......@@ -467,15 +472,15 @@ etc), the =ts_table= must first be coerced to =zoo=, =xts=,
a data-frame or a similar data structure. Methods that
perform such coercions are responsible for converting
the numeric timestamp vector to an actual
timestamp. For this, they may use the internal function
timestamp. For this, they may use the function
=ttime=, whose pronounciation may remind you of a hot
beverage, but which really stands for =translate time=.
** The file format
tsdb can store and load time-series data. The format
it uses is plain CSV; a sample file may look as
=tsdb= can store and load time-series data. The format
it uses is plain CSV. A sample file may look as
follows:
#+BEGIN_EXAMPLE
......@@ -491,19 +496,19 @@ beverage, but which really stands for =translate time=.
names of the columns, with the first column always
being named =timestamp=.
The advantage of this plain format is that the data are
in no way dependent on =tsdb=. The files can be used
and manipulated by other software as well.
The advantage of this plain format is that the data
are in no way dependent on =tsdb=. The files can be
used and manipulated by other software as well.
** Timestamps
:PROPERTIES:
:CUSTOM_ID: sec:timestamps
:CUSTOM_ID: timestamps
:END:
Two types of timestamps are supported: =Date= and
=POSXIct=. As part of a =ts_table=, timestamps are
always stored in their numeric representation: Daily
always stored in their numeric representation: daily
timestamps are represented as the number of days
since 1 Jan 1970; intraday timestamps are the number
of seconds since 1 Jan 1970.
......@@ -13,7 +13,6 @@
\usage{
file_info(dir, file)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
\item{dir}{
character
......@@ -24,13 +23,23 @@ file_info(dir, file)
}
\details{
Experimental.
Provide information, such as number of entries, of
specified files.
It is recommended that code that uses the returned
information to alter or write tables, should explicitly
check whether a table exists (column \code{exists} in
the returned \code{\link{data.frame}}). For instance,
a value of \code{\link{NA}} for \code{min.timestamp}
would occur for a non-existing file, but also if the
file could not be read for some reason.
}
\value{
An object of type \code{file_info}, which is a
\code{data.frame}.
\code{data.frame} with information such as whether a
file exists, minimum and maximum timestamp, and more.
}
\author{
......@@ -41,6 +50,5 @@ file_info(dir, file)
}
\examples{
\dontrun{
file_info(dir = "/tsdb",
c("table1", "table2"))}
file_info(dir = "/tsdb", c("table1", "table2"))}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment