rw.py 9.13 KB
Newer Older
1
from __future__ import print_function
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
2
# flake8: noqa
3 4 5
"""

File content::
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
6

7 8 9 10 11 12 13 14 15 16 17 18 19 20
    0: "IOASE..."
    8: version
    16: nitems (int64)
    24: 32 (position of offsets, int64)
    32: p0 (offset to json data, int64)
    40: 8-byte aligned ndarrays
    p0: n (length of json data, int64)
    p0+8: json data
    p0+8+n: EOF

"""

# magig prefix?, ascii header? See hdf5 header,
# ordereddict, endianness, todict?
21 22 23

import numpy as np

24 25 26 27 28
from ase.db.jsondb import encode, decode


VERSION = 1
N1 = 42  # block size - max number of items: 1, N1, N1*N1, N1*N1*N1, ...
29 30


31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
def align(fd):
    """Advance file descriptor to 8 byte alignment and return position."""
    pos = fd.tell()
    r = pos % 8
    if r == 0:
        return pos
    fd.write(b'#' * (8 - r))
    return pos + 8 - r


def writeint(fd, n, pos=None):
    """Write 64 bit integer n at pos or current position."""
    if pos is not None:
        fd.seek(pos)
    np.array(n, np.int64).tofile(fd)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
46

47

48
class Writer:
49
    def __init__(self, fd, mode='w', data=None):
50 51 52 53 54 55 56 57 58 59
        """Create writer object.

        The data dictionary holds:

        * data for type bool, int, float, complex and str
        * shape and dtype for ndarrays
        * class names for other objects

        These other objects must have a write() method and a static
        read() method."""
60 61

        assert np.little_endian
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
62

63 64
        if data is None:
            data = {}
65 66 67 68 69 70
            if mode == 'w':
                self.nitems = 0
                self.itemoffsets = 32
                self.offsets = np.array([-1], np.int64)

                fd = open(fd, 'wb')
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
71

72 73 74 75 76 77 78
                # Write file format identifier:
                fd.write(b'IOASE...')
                np.array([VERSION, self.nitems, self.itemoffsets],
                         np.int64).tofile(fd)
                self.offsets.tofile(fd)
            elif mode == 'a':
                fd = open(fd, 'r+b')
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
79

80 81 82 83 84 85 86 87 88 89 90
                version, self.nitems, self.itemoffsets, offsets = \
                    read_header(fd)
                assert version == VERSION
                n = 1
                while self.nitems > n:
                    n *= N1
                padding = np.zeros(n - self.nitems, np.int64)
                self.offsets = np.concatenate((offsets, padding))
                fd.seek(0, 2)
            else:
                2 / 0
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
91

92
        self.fd = fd
93
        self.data = data
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
94

95
        # Shape and dtype of array being filled:
96 97
        self.shape = (0,)
        self.dtype = None
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
98

99 100 101
    def add_array(self, name, shape, dtype=float, delayed_read=True):
        if isinstance(shape, int):
            shape = (shape,)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
102

103 104 105 106 107
        i = align(self.fd)
        self.data[name] = {'_type': 'numpy.ndarray',
                           'shape': shape,
                           'dtype': np.dtype(dtype).name,
                           'offset': i}
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
108

109 110
        if delayed_read:
            self.data[name]['_delayed'] = True
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
111

112
        assert self.shape[0] == 0, 'last array not done'
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
113

114 115
        self.dtype = dtype
        self.shape = shape
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
116

117 118 119 120 121 122 123 124 125
    def fill(self, a):
        assert a.dtype == self.dtype
        if a.shape[1:] == self.shape[1:]:
            assert a.shape[0] <= self.shape[0]
            self.shape = (self.shape[0] - a.shape[0],) + self.shape[1:]
        else:
            assert a.shape == self.shape[1:]
            self.shape = (self.shape[0] - 1,) + self.shape[1:]
        assert self.shape[0] >= 0
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
126

127 128 129 130 131 132 133 134 135 136 137 138 139
        a.tofile(self.fd)

    def sync(self):
        """Write data dictionary.

        Write bool, int, float, complex and str data, shapes and
        dtypes for ndarrays and class names for other objects."""

        assert self.shape[0] == 0
        i = self.fd.tell()
        s = encode(self.data).encode()
        writeint(self.fd, len(s))
        self.fd.write(s)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
140

141 142 143 144 145 146 147 148
        n = len(self.offsets)
        if self.nitems >= n:
            offsets = np.zeros(n * N1, np.int64)
            offsets[:n] = self.offsets
            self.itemoffsets = align(self.fd)
            offsets.tofile(self.fd)
            writeint(self.fd, self.itemoffsets, 24)
            self.offsets = offsets
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
149

150 151 152 153 154 155 156
        self.offsets[self.nitems] = i
        writeint(self.fd, i, self.itemoffsets + self.nitems * 8)
        self.nitems += 1
        writeint(self.fd, self.nitems, 16)
        self.fd.flush()
        self.fd.seek(0, 2)  # end of file
        self.data = {}
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
157

158 159 160 161 162 163 164
    def write(self, **kwargs):
        """Write data.

        Use::

            writer.write(n=7, s='abc', a=np.zeros(3), density=density).
        """
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
165

166
        for name, value in kwargs.items():
167 168
            if isinstance(value, (bool, int, float, complex,
                                  dict, list, tuple, str)):
169 170
                self.data[name] = value
            elif isinstance(value, np.ndarray):
171 172 173
                self.add_array(name, value.shape, value.dtype,
                               delayed_read=False)
                self.fill(value)
174 175 176 177
            else:
                self.data[name] = {'_type':
                                   value.__module__ + '.' +
                                   value.__class__.__name__}
178
                writer = Writer(self.fd, data=self.data[name])
179
                value.write(writer)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
180

181
    def close(self):
182 183
        self.sync()
        self.fd.close()
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
184 185


186 187 188 189 190 191 192
def read_header(fd):
    fd.seek(0)
    assert fd.read(8) == b'IOASE...'
    version, nitems, itemoffsets = np.fromfile(fd, np.int64, 3)
    fd.seek(itemoffsets)
    offsets = np.fromfile(fd, np.int64, nitems)
    return version, nitems, itemoffsets, offsets
193

Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
194

195
class Reader:
196
    def __init__(self, fd, item=0, data=None):
197 198 199 200
        """Create hierarchy of readers.

        Store data as attributes for easy access and to allow
        tab-completion."""
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
201

202 203 204 205
        assert np.little_endian

        if isinstance(fd, str):
            fd = open(fd, 'rb')
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
206

207
        self.fd = fd
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
208

209 210 211 212 213
        if data is None:
            self.version, self.nitems, self.itemoffsets, self.offsets = \
                read_header(fd)
            data = self._read_data(item)

214
        for name, value in data.items():
215
            if isinstance(value, dict) and '_type' in value:
216
                if value['_type'] == 'numpy.ndarray':
217 218 219 220 221 222 223
                    read_now = '_delayed' not in value
                    value = NDArrayReader(fd,
                                          value['shape'],
                                          np.dtype(value['dtype']),
                                          value['offset'])
                    if read_now:
                        value = value.read()
224
                else:
225
                    value = Reader(self.fd, data=value)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
226

227
            data[name] = value
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
228

229
        self.data = data
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
230

231 232
    def __dir__(self):
        return self.data.keys()
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
233

234 235 236 237 238
    def __getattr__(self, attr):
        value = self.data[attr]
        if isinstance(value, NDArrayReader):
            return value.read()
        return value
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
239

240 241 242 243
    def proxy(self, name):
        value = self.data[name]
        assert isinstance(value, NDArrayReader)
        return value
244

245 246
    def __len__(self):
        return self.nitems
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
247

248 249 250 251 252
    def _read_data(self, item):
        self.fd.seek(self.offsets[item])
        size = np.fromfile(self.fd, np.int64, 1)[0]
        data = decode(self.fd.read(size).decode())
        return data
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
253

254 255 256
    def __getitem__(self, i):
        data = self._read_data(i)
        return Reader(self.fd, data=data)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
257

258

259 260
read = Reader
write = Writer
261

262 263 264 265 266 267 268

class NDArrayReader:
    def __init__(self, fd, shape, dtype, offset):
        self.fd = fd
        self.shape = tuple(shape)
        self.dtype = dtype
        self.offset = offset
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
269

270 271 272 273
        self.ndim = len(self.shape)
        self.itemsize = dtype.itemsize
        self.size = np.prod(self.shape)
        self.nbytes = self.size * self.itemsize
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
274

275 276
    def __len__(self):
        return self.shape[0]
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
277

278 279
    def read(self):
        return self[:]
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
280

281 282 283 284 285 286 287 288 289 290 291
    def __getitem__(self, i):
        if isinstance(i, int):
            return self[i:i + 1][0]
        start, stop, step = i.indices(len(self))
        offset = self.offset + start * self.nbytes // len(self)
        self.fd.seek(offset)
        count = (stop - start) * self.size // len(self)
        a = np.fromfile(self.fd, self.dtype, count)
        a.shape = (-1,) + self.shape[1:]
        if step != 1:
            return a[::step].copy()
292 293
        return a

jensj's avatar
jensj committed
294 295 296 297 298 299 300

def main():
    args = sys.argv[1:]
    r = Reader(args[0])
    exec('x = ' + rags[1])
    # csv for 2d ...
    print(x)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
301 302


303 304 305 306
if __name__ == '__main__':
    class A:
        def write(self, writer):
            writer.write(x=np.ones((2, 3)))
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
307

308 309 310 311 312
        @staticmethod
        def read(reader):
            a = A()
            a.x = reader.x
            return a
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
313

314 315 316 317 318 319 320 321 322
    w = Writer('a.ioase')
    w.write(a=A(), y=9)
    w.write(s='abc')
    w.sync()
    w.write(s='abc2')
    w.sync()
    w.write(s='abc3', z=np.ones(7, int))
    w.close()
    print(w.data)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
323

324 325 326 327 328 329 330
    r = Reader('a.ioase')
    print(r.y, r.s)
    print(A.read(r.a).x)
    print(r.a.x)
    print(r[1].s)
    print(r[2].s)
    print(r[2].z)
Jens Jørgen Mortensen's avatar
Jens Jørgen Mortensen committed
331

332 333 334 335 336 337 338 339 340 341 342
    w = Writer('a.ioase', 'a')
    print(w.nitems, w.offsets)
    w.write(d={'h': [1, 'asdf']})
    w.add_array('psi', (4, 3))
    w.fill(np.ones((1, 3)))
    w.fill(np.ones((1, 3)) * 2)
    w.fill(np.ones((2, 3)) * 3)
    w.close()
    print(Reader('a.ioase', 3).d)
    print(Reader('a.ioase')[2].z)
    print(Reader('a.ioase', 3).proxy('psi')[0:3])