Commit 7d085029 authored by Enrico Schumann's avatar Enrico Schumann

Version 0.6-1

o various documentation updates

o the "Tutorial" PDF has been removed, but
  its content may be found at
  https://gitlab.com/enricoschumann/tsdb/README.org and
  https://github.com/enricoschumann/tsdb/README.org
parent 86b07277
......@@ -2,7 +2,7 @@ Package: tsdb
Type: Package
Title: Terribly-Simple Data Base for Time Series
Version: 0.6-1
Date: 2019-03-24
Date: 2019-03-26
Maintainer: Enrico Schumann <es@enricoschumann.net>
Authors@R: person(given = "Enrico", family = "Schumann",
role = c("aut", "cre"),
......@@ -12,7 +12,7 @@ Description: A terribly-simple data base for numeric
time series, written purely in R, so no external
database-software is needed. Series are stored in
plain-text files (the most-portable and enduring file
type) in CSV format; timestamps are encoded in R's
type) in CSV format; timestamps are encoded using R's
native numeric representation for Date/POSIXct, which
makes them fast to parse, but keeps them accessible
with other software. The package provides tools for
......@@ -24,3 +24,6 @@ Description: A terribly-simple data base for numeric
License: GPL-3
Imports: datetimeutils, fastmatch, utils, zoo
Suggests: DBI, MonetDBLite, data.table
URL: http://enricoschumann.net/R/packages/tsdb,
https://github.com/enricoschumann/tsdb,
https://gitlab.com/enricoschumann/tsdb
\ No newline at end of file
v0.6-1 (2019-03-26)
o various documentation updates
o the "Tutorial" PDF has been removed, but
its content may be found at
https://gitlab.com/enricoschumann/tsdb/README.org and
https://github.com/enricoschumann/tsdb/README.org
v0.6-0 (2019-03-21)
o fixed: 'write_ts_table' with option 'add' would
......@@ -23,8 +32,8 @@ v0.6-0 (2019-03-21)
v0.5-0 (2017-10-24)
o fixed: write_ts_table does now also write empty
files
o fixed: 'write_ts_table' does now also write
empty files
o write_ts_table: first argument has been renamed 'ts'
......@@ -32,7 +41,7 @@ v0.5-0 (2017-10-24)
o read_ts_tables: rename argument 'column.name' to
'column.names' (plural)
o new ts_table method for as.matrix
v0.4-1 (2017-02-06)
......@@ -53,7 +62,7 @@ v0.2-1 (2016-12-09)
o A Tutorial is now installed with the package.
It can be accessed via
browseURL(system.file("Tutorial/README.html", package = "tsdb"))
browseURL(system.file("Tutorial/README.pdf", package = "tsdb"))
......@@ -65,7 +74,7 @@ v0.2-0 (2016-12-05)
o New method 'as.zoo.ts_table' added.
o Tutorial added (see file README.org).
v0.1-0 (2016-11-24)
o Initial version of package.
......@@ -264,16 +264,16 @@ read_ts_tables <- function(file, dir, t.type = "guess",
dim = c(length(timestamp), length(dfile)*nc))
for (i in seq_along(dfile)) {
if (is.null(read.fn))
tmp <- data.table::fread(dfile[[i]],
sep = ",",
header = TRUE,
data.table = FALSE)
else if (read.fn == "fread")
tmp <- read.table(dfile[[i]],
sep = ",",
stringsAsFactors = FALSE,
header = TRUE,
colClasses = "numeric")
else if (read.fn == "fread")
tmp <- data.table::fread(dfile[[i]],
sep = ",",
header = TRUE,
data.table = FALSE)
else
stop("unknown ", sQuote("read.fn"))
ii <- fmatch(tmp[[1L]], timestamp, nomatch = 0L)
......@@ -319,22 +319,23 @@ read_ts_tables <- function(file, dir, t.type = "guess",
list(data = results,
timestamp = ttime(timestamp, from = "numeric", t.type),
columns = rep(columns, each = length(dfile)),
file.path = paste(rep(dfile, each = length(columns)), columns, sep = "::"))
file.path = paste(rep(dfile, each = length(columns)),
columns, sep = "::"))
} else if (return.class == "zoo") {
if (!requireNamespace("zoo"))
stop("package ", sQuote("zoo"), " not available")
if (!is.null(dim(results)))
colnames(results) <- colnames
zoo(results, timestamp)
zoo(results, ttime(timestamp, from = "numeric", t.type))
} else if (return.class == "data.frame") {
ans <- data.frame(timestamp, results)
ans <- data.frame(ttime(timestamp, from = "numeric", t.type), results)
colnames(ans) <- c("timestamp", colnames)
ans
} else if (return.class == "ts_table") {
ans <- as.matrix(ans)
dimnames(ans) <- NULL
attr(ans, "t.type") <- t.type
attr(ans, "timestamp") <- ttime(timestamp)
attr(ans, "timestamp") <- timestamp
attr(ans, "columns") <- columns
class(ans) <- "ts_table"
ans
......@@ -365,49 +366,13 @@ print.ts_table <- function(x, ...) {
dir_info <- function(dir = getwd()) {
res <- dir()
class(res) <- "dir_info"
res
}
print.dir_info <- function(x, ...) {
print(unclass(x))
print(unclass(x), ...)
}
file_info <- function(dir, file) {
dfile <- if (missing(dir))
file
else
file.path(dir, file)
nf <- length(dfile)
res <- data.frame(file = file,
dir_file = dfile,
exists = file.exists(dfile),
columns = character(nf),
nrows = NA,
t.type = NA,
min.timestamp = NA,
max.timestamp = NA,
stringsAsFactors = FALSE)
for (i in seq_len(nf)) {
if (!res[["exists"]][i])
next
fi <- try(read_ts_tables(dfile[i], return.class = NULL), silent = TRUE)
if (inherits(fi, "try-error"))
next
res[["nrows"]][i] <- length(fi$timestamp)
if (length(fi$timestamp)) {
res[["min.timestamp"]][i] <- min(fi$timestamp)
res[["max.timestamp"]][i] <- max(fi$timestamp)
res[["t.type"]][i] <- class(fi$timestamp)
}
}
class(res) <- c("file_info", "data.frame")
res
}
print.file_info <- function(x, ...) {
print(x, ...)
}
## --------------------- COERCION
......@@ -445,7 +410,8 @@ as.data.frame.ts_table <- function(x,
names(ans) <- col
} else {
ans <- cbind(timestamp = timestamp,
data.frame(unclass(x), stringsAsFactors = FALSE))
data.frame(unclass(x),
stringsAsFactors = FALSE))
names(ans) <- c("timestamp", col)
}
ans
......@@ -487,7 +453,8 @@ file_info <- function(dir, file) {
for (i in seq_len(nf)) {
if (!res[["exists"]][i])
next
fi <- try(read_ts_tables(dfile[i], return.class = NULL), silent = TRUE)
fi <- try(read_ts_tables(dfile[i], return.class = NULL),
silent = TRUE)
if (inherits(fi, "try-error"))
next
res[["nrows"]][i] <- length(fi$timestamp)
......@@ -502,7 +469,8 @@ file_info <- function(dir, file) {
}
print.file_info <- function(x, ...) {
print(x, ...)
print.data.frame(x, ...)
invisible(x)
}
......@@ -517,17 +485,6 @@ rm_ts_table <- function(file, dir, ..., trash.bin = ".trash.bin") {
}
dir_info <- function(dir = getwd()) {
res <- dir()
class(res) <- "dir_info"
res
}
print.dir_info <- function(x, ...) {
print(unclass(x))
invisible(x)
}
.timestamp <- function(x)
attr(x, "timestamp")
......
## write nf time-series to files, then read/merge them
nf <- 1000 ## number of files
nd <- 3700 ## number of days per time-series
nf <- 100 ## number of files
nd <- 6000 ## number of days per time-series
library("tsdb")
......@@ -12,6 +12,7 @@ x <- 1:nd
z0 <- zoo(x, as.Date("2007-12-31") + x)
trials <- 5
t <- system.time(
for (i in 1:trials ) {
for (i in 1:nf) {
......@@ -22,11 +23,10 @@ t <- system.time(
)
t[[3]]/trials
t <- system.time(
for (i in 1:trials ) {
read_ts_tables(as.character(1:nf), dir = d,
start = "2010-1-1", end = "2015-12-31",
start = "2007-1-1", end = "2016-12-31",
return.class = "zoo", column.names = "%file%")
}
)
......@@ -35,9 +35,9 @@ t[[3]]/trials
t <- system.time(
for (i in 1:trials ) {
read_ts_tables(as.character(1:nf), dir = d,
start = "2010-1-1", end = "2015-12-31",
start = "2007-1-1", end = "2016-12-31",
return.class = "zoo", column.names = "%file%",
fread = TRUE)
read.fn = "fread")
}
)
t[[3]]/trials
......@@ -2,9 +2,9 @@
test.ts_table <- function() {
require("RUnit")
require("tsdb")
require("zoo")
## library("RUnit")
## library("tsdb")
## library("zoo", warn.conflicts = FALSE)
y <- ts_table(11:15, as.Date("2016-1-1")-5:1, "close")
checkEquals(y,
......@@ -50,9 +50,9 @@ test.ts_table <- function() {
test.read_ts_tables <- function() {
library("RUnit")
library("tsdb")
library("zoo")
## library("RUnit")
## library("tsdb")
## library("zoo", warn.conflicts = FALSE)
x <- ts_table(data = 11:15,
timestamp = as.Date("2016-1-1") + 1:5,
columns = "A")
......@@ -69,12 +69,32 @@ test.read_ts_tables <- function() {
tmp <- read_ts_tables(c("A", "BA"), dir, columns = c("A"),
start = "2016-1-1", drop.weekends = FALSE)
checkEquals(tmp[[1]],
checkEquals(tmp$data,
structure(c(11, 12, 13, 14, 15, 6, 7, 8, 9, 10),
.Dim = c(5L, 2L)))
checkEquals(tmp$timestamp,
structure(c(16802, 16803, 16804,
16805, 16806),
class = "Date"))
## check POSIXct
tmp <- read_ts_tables(c("A", "BA"), dir, columns = c("A"),
start = "2016-1-1",
drop.weekends = FALSE,
return.class = "zoo")
colnames(tmp) <- c("A1", "A2")
checkEquals(tmp,
structure(c(11, 12, 13, 14, 15, 6, 7, 8, 9, 10),
.Dim = c(5L, 2L),
.Dimnames = list(NULL, c("A1", "A2")),
index = structure(
c(16802, 16803, 16804, 16805, 16806),
class = "Date"),
class = "zoo"))
## check POSIXct
z1 <- ts_table(11:15,
as.POSIXct("2016-1-1 10:00:00", tz = "UTC")+0:4,
"close")
......@@ -86,7 +106,7 @@ test.read_ts_tables <- function() {
z12 <- read_ts_tables(c("X1", "X2"), dir, columns = "close",
start = as.POSIXct("2016-1-1 10:00:00", tz = "UTC"),
end = as.POSIXct("2016-1-1 10:00:20", tz = "UTC"))
checkEquals(z12$data,
structure(c(11, 12, 13, 14, 15, NA,
NA, 1, 2, 3, 4, 5),
......@@ -94,7 +114,7 @@ test.read_ts_tables <- function() {
checkEquals(z12$timestamp,
as.POSIXct("2016-1-1 10:00:00", tz = "UTC")+0:5)
z12 <- read_ts_tables(c("X1", "X2"), dir, columns = "close",
start = "2016-1-1 11:00:00",
end = "2016-1-1 11:00:20")
......@@ -106,27 +126,22 @@ test.read_ts_tables <- function() {
checkEquals(z12$timestamp,
as.POSIXct("2016-1-1 10:00:00", tz = "UTC")+0:5)
## check empty file
writeLines('"timestamp","close"', file.path(dir, "empty"))
em <- read_ts_tables("empty", dir)
checkEquals(em$timestamp, structure(numeric(0), class = "Date"))
checkEquals(em$data, structure(numeric(0), .Dim = 0:1))
}
test.write_ts_table <- function() {
## require("RUnit")
## require("tsdb")
## require("zoo")
## library("RUnit")
## library("tsdb")
## library("zoo")
dir <- tempdir()
x <- ts_table(data = 11:15,
......@@ -319,15 +334,15 @@ test.write_ts_table <- function() {
test.zoo <- function() {
## require("RUnit")
## require("tsdb")
library("zoo", warn.conflicts = FALSE)
## library("RUnit")
## library("tsdb")
## library("zoo", warn.conflicts = FALSE)
y <- ts_table(11:15, as.Date("2016-1-1")-5:1, "close")
checkEqualsNumeric(zoo::as.zoo(y),
zoo::zoo(as.matrix(y), as.Date("2016-1-1")-5:1))
y <- zoo(11:15, as.Date("2016-1-1")-5:1)
y <- zoo::zoo(11:15, as.Date("2016-1-1")-5:1)
checkEquals(as.ts_table(y, columns = "close"),
structure(11:15,
.Dim = c(5L, 1L),
......
......@@ -2,7 +2,7 @@
\alias{as.ts_table}
\alias{as.ts_table.zoo}
\title{
Coerce To ts_table
Coerce to ts_table
}
\description{
Coerce objects to \code{ts_table}
......@@ -24,10 +24,13 @@ as.ts_table(x, ...)
}
}
\details{
A generic function.
A generic function for coercing objects to class
\code{\link{ts_table}}.
}
\value{
A ts_table.
a \code{\link{ts_table}}
}
\author{
Enrico Schumann
......@@ -36,9 +39,7 @@ as.ts_table(x, ...)
\code{\link{read_ts_tables}}
}
\examples{
require("zoo")
library("zoo")
as.ts_table(zoo(1:5, Sys.Date()-5:1), ## note that the "columns"
columns = "value") ## must be specified
}
......@@ -7,8 +7,7 @@
Provides information about data stored in file:
columns, number of observations, range of timestamps.
}
\usage{
file_info(dir, file)
......@@ -49,6 +48,8 @@ file_info(dir, file)
\code{\link{ts_table}}
}
\examples{
\dontrun{
file_info(dir = "/tsdb", c("table1", "table2"))}
ts <- ts_table(1:3, as.Date("2018-12-3") + 1:3, columns = "A")
d <- tempdir()
write_ts_table(ts, file = "temp", dir = d)
file_info(d, "temp")
}
\name{read_ts_tables}
\alias{read_ts_tables}
\title{
Read Data from Files
Read Time-Series Data from Files
}
\description{
Read time-series data from files.
Read time-series data from files and merge them.
}
\usage{
read_ts_tables(file, dir, t.type = "guess",
......@@ -33,41 +35,43 @@ read_ts_tables(file, dir, t.type = "guess",
character
}
\item{columns}{
character. \bold{Currently only a single column is supported.}
character.
}
\item{return.class}{
\code{NULL} (default) or character: if \code{NULL}, a list is
returned. Also supported are \code{zoo},
\code{\link{data.frame}} and \code{\link{ts_table}}.
}
\item{drop.weekends}{
logical
\item{return.class}{
\code{NULL} (default) or character: if \code{NULL}, a list is
returned. Also supported are \code{zoo},
\code{\link{data.frame}} and \code{\link{ts_table}}.
}
\item{drop.weekends}{
logical
}
\item{column.names}{
character: a format string for column names; may contain
\code{\%dir\%}, \code{\%file\%}, and \code{\%column\%}
}
\item{backend}{
character: currently, only \sQuote{\code{csv}} is fully supported
}
\item{read.fn}{
\code{NULL} or character: use \sQuote{\code{fread}}
to use \code{\link[data.table]{fread}}
from package \pkg{data.table}
}
\item{frequency}{
character: only used when \code{t.type} is
\code{POSIXct} (or guessed to be \code{POSIXct})
}
\item{column.names}{
character: a format string for column names; may contain
\code{\%dir\%}, \code{\%file\%}, and \code{\%column\%}
}
\item{backend}{
character: currently, only \code{csv} is fully supported
}
\item{read.fn}{
\code{NULL} or character: use \sQuote{\code{fread}}
to use \code{\link[data.table]{fread}}
from package \pkg{data.table}
}
\item{frequency}{
character: only used when \code{t.type} is
\code{POSIXct} (or guessed to be \code{POSIXct})
}
}
\details{
Read time-series data from csv files.
Read time-series data from \acronym{CSV} files.
}
\value{
When return.class is \code{NULL}, a list:
......@@ -76,17 +80,28 @@ read_ts_tables(file, dir, t.type = "guess",
\item{columns}{character}
\item{file.path}{character}
}
\author{
Enrico Schumann
}
\seealso{
write_ts_table
\code{\link{write_ts_table}}
}
\examples{
\dontrun{
read_ts_tables(c("table1", "table2"),
dir = "~/tsdb",
columns = "close",
return.class = "zoo")}
t1 <- ts_table(1:3, as.Date("2018-12-3") + 1:3, columns = "A")
t2 <- ts_table(4:5, as.Date("2018-12-3") + 1:2, columns = "A")
d <- tempdir() ## this is just an example.
## Actual (valuable) data should never
## be stored in a tempdir!
write_ts_table(t1, dir = d, file = "t1")
write_ts_table(t2, dir = d, file = "t2")
read_ts_tables(c("t1", "t2"),
dir = d, columns = "A",
return.class = "zoo",
column.names = "\%file\%.\%column\%")
}
......@@ -4,7 +4,7 @@
Create ts_table
}
\description{
Create \code{ts_table}
Create a \code{ts_table}.
}
\usage{
ts_table(data, timestamp, columns)
......@@ -14,21 +14,53 @@ ts_table(data, timestamp, columns)
numeric
}
\item{timestamp}{
Date or POSIXct
\code{\link{Date}} or \code{\link{POSIXct}}
}
\item{columns}{
column names
}
}
\details{
Creates a ts_table.
Timestamps must be of class Date or POSIXct (POSIXlt
is converted). A \code{tzone} attribute is dropped.
Create a time-series table (\code{ts_table}). A
\code{ts_table} is a numeric matrix, so there is
always a \code{dim} attribute. For a \code{ts_table}
\code{x}, you get the number of observations with
\code{dim(x)[1L]}.
Attached to this matrix are several attributes:
\describe{
\item{timestamp}{a vector: the numeric representation of
the timestamp}
\item{t.type}{character: the class of the original
timestamp, either \code{Date} or \code{POSIXct}}
\item{columns}{a character vector that provides the
columns names}
}
There may be other attributes as well, but these three
are always present.
Timestamps must be of class \code{\link{Date}} or
\code{\link{POSIXct}} (\code{\link{POSIXlt}} is
converted). A \code{tzone} attribute is dropped.
A \code{ts_table} is not meant as a time-series
class. For most computations (plotting, calculation
of statistics, etc.), the \code{ts_table} must first
be coerced to \code{zoo}, \code{xts}, a
\code{\link{data.frame}} or a similar data
structure. Methods that perform such coercions are
responsible for converting the numeric timestamp
vector to an actual timestamp. For this, they may use
the function \code{\link{ttime}} (\sQuote{translate time}).
}
\value{
A \code{ts_table}:
a \code{ts_table}
}
\author{
Enrico Schumann
......@@ -37,5 +69,5 @@ ts_table(data, timestamp, columns)
\code{\link{as.ts_table}}
}
\examples{
ts_table(1:5, Sys.Date()-5:1, columns = "value")
ts_table(1:5, Sys.Date() - 5:1, columns = "value")
}
\name{tsdb-package}
\alias{tsdb-package}
\alias{tsdb}
\alias{ttime}
\title{
Store and Retrieve Time-Series
Terribly-Simple Database for Time Series
}
\description{
Several simple utilities that allow to handle
time-series data as CSV files.
A terribly-simple data base for numeric time series,
written purely in \R, so no external database-software is
needed. Series are stored in plain-text files (the
most-portable and enduring file type) in \acronym{CSV}
format; timestamps are encoded using \R's native numeric
representation for
\code{\link{Date}}/\code{\link{POSIXct}}, which makes them
fast to parse, but keeps them accessible with other
software. The package provides tools for saving and
updating series in this standardised format, for
retrieving and joining data, for summarising files and
directories, and for coercing series from and to other
data types (such as 'zoo' series).
}
\usage{
ttime(x, from = "datetime", to = "numeric", tz = "",
strip.attr = TRUE, format = "\%Y-\%m-\%d")
}
\arguments{
\item{x}{
numeric
}
\item{from}{
character
}
\item{to}{
character
}
\item{tz}{
character
}
\item{strip.attr}{
logical: strip attributes; in particular, timezone information
}
\item{format}{
character
}
}
\details{
\code{ttime} (`translate time') converts timestamps
between formats.
See the functions \code{\link{ts_table}} and
\code{\link{as.ts_table}} for creating a
\code{\link{ts_table}}.
See \code{\link{write_ts_table}} and
\code{\link{read_ts_tables}} for storing and loading a
\code{\link{ts_table}} (or several).
See the tutorial at
\url{https://gitlab.com/enricoschumann/tsdb/blob/master/README.org}
or
\url{https://github.com/enricoschumann/tsdb/blob/master/README.org} .
}
\author{
Enrico Schumann
}
\seealso{
\code{\link{read_ts_tables}}
}
\examples{
## see the particular functions
\code{\link{ts_table}} and \code{\link{as.ts_table}} for
creating a \code{\link{ts_table}}
\code{\link{write_ts_table}} and \code{\link{read_ts_tables}} for
storing and loading a \code{\link{ts_table}}
}
\name{ttime}
\alias{ttime}
\title{
Translate Timestamps
}
\description{
Translate a vector of timestamps.
}
\usage{
ttime(x, from = "datetime", to = "numeric", tz = "",
strip.attr = TRUE, format = "\%Y-\%m-\%d")
}
\arguments{
\item{x}{
numeric
}
\item{from}{
character
}
\item{to}{
character
}
\item{tz}{