serialize.c 83.5 KB
Newer Older
Radford Neal's avatar
Radford Neal committed
1
/*
2
 *  pqR : A pretty quick version of R
3
 *  Copyright (C) 2013, 2014, 2015, 2016, 2017 by Radford M. Neal
4 5
 *
 *  Based on R : A Computer Language for Statistical Data Analysis
6
 *  Copyright (C) 1995--2012  The R Core Team
Radford Neal's avatar
Radford Neal committed
7
 *
8 9 10
 *  The changes in pqR from R-2.15.0 distributed by the R Core Team are
 *  documented in the NEWS and MODS files in the top-level source directory.
 *
Radford Neal's avatar
Radford Neal committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, a copy is available at
 *  http://www.r-project.org/Licenses/
 */

/* <UTF8> byte-level access is only to compare with chars <= 0x7F */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#define NEED_CONNECTION_PSTREAMS
33
#define USE_FAST_PROTECT_MACROS
Radford Neal's avatar
Radford Neal committed
34
#define R_USE_SIGNALS 1
Radford Neal's avatar
Radford Neal committed
35 36 37 38 39 40
#include <Defn.h>
#include <Rmath.h>
#include <Fileio.h>
#include <Rversion.h>
#include <R_ext/RS.h>           /* for CallocCharBuf, Free */
#include <errno.h>
Radford Neal's avatar
Radford Neal committed
41
#include <ctype.h>		/* for isspace */
Radford Neal's avatar
Radford Neal committed
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101

/* From time to time changes in R, such as the addition of a new SXP,
 * may require changes in the save file format.  Here are some
 * guidelines on handling format changes:
 *
 *    Starting with 1.4 there is a version number associated with save
 *    file formats.  This version number should be incremented when
 *    the format is changed so older versions of R can recognize and
 *    reject the new format with a meaningful error message.
 *
 *    R should remain able to write older workspace formats.  An error
 *    should be signaled if the contents to be saved is not compatible
 *    with the requested format.
 *
 *    To allow older versions of R to give useful error messages, the
 *    header now contains the version of R that wrote the workspace
 *    and the oldest version that can read the workspace.  These
 *    versions are stored as an integer packed by the R_Version macro
 *    from Rversion.h.  Some workspace formats may only exist
 *    temporarily in the development stage.  If readers are not
 *    provided in a release version, then these should specify the
 *    oldest reader R version as -1.
 */


/* ----- V e r s i o n -- T w o -- S a v e / R e s t o r e ----- */

/* Adapted from Chris Young and Ross Ihaka's Version One by Luke
   Tierney.  Copyright Assigned to the R Project.

   The approach used here uses a single pass over the node tree to be
   serialized.  Sharing of reference objects is preserved, but sharing
   among other objects is ignored.  The first time a reference object
   is encountered it is entered in a hash table; the value stored with
   the object is the index in the sequence of reference objects (1 for
   first reference object, 2 for second, etc.).  When an object is
   seen again, i.e. it is already in the hash table, a reference
   marker along with the index is written out.  The unserialize code
   does not know in advance how many reference objects it will see, so
   it starts with an initial array of some reasonable size and doubles
   it each time space runs out.  Reference objects are entered as they
   are encountered.

   This means the serialize and unserialize code needs to agree on
   what is a reference object.  Making a non-reference object into
   a reference object requires a version change in the format.  An
   alternate design would be to precede each reference object with a
   marker that says the next thing is a possibly shared object and
   needs to be entered into the reference table.

   Adding new SXP types is easy, whether they are reference objects or
   not.  The unserialize code will signal an error if it sees a type
   value it does not know.  It is of course better to increment the
   serialization format number when a new SXP is added, but if that
   SXP is unlikely to be saved by users then it may be simpler to keep
   the version number and let the error handling code deal with it.

   The output format for dotted pairs writes the ATTRIB value first
   rather than last.  This allows CDR's to be processed by iterative
   tail calls to avoid recursion stack overflows when processing long
102 103
   lists.  Both the writing code and the reading code take advantage 
   of this.
Radford Neal's avatar
Radford Neal committed
104 105 106 107

   CHARSXPs are now handled in a way that preserves both embedded null
   characters and NA_STRING values.

108 109 110
   The "XDR" save format no longer uses the XDR routines, which are
   slow and cumbersome.  Conversion to big-endian representation is
   accomplished with routines in this module.
Radford Neal's avatar
Radford Neal committed
111 112 113 114 115

   The output format packs the type flag and other flags into a single
   integer.  This produces more compact output for code; it has little
   effect on data.

Radford Neal's avatar
Radford Neal committed
116
   Environments recognized as package or namespace environments are
Radford Neal's avatar
Radford Neal committed
117
   not saved directly. Instead, a STRSXP is saved that is then used to
Radford Neal's avatar
Radford Neal committed
118
   attempt to find the package/namespace when unserialized.  The
Radford Neal's avatar
Radford Neal committed
119 120 121 122 123 124 125 126 127 128 129 130 131
   exact mechanism for choosing the name and finding the package/name
   space from the name still has to be developed, but the
   serialization format should be able to accommodate any reasonable
   mechanism.

   The mechanism assumes that user code supplies one routine for
   handling single characters and one for handling an array of bytes.
   Higher level interfaces that serialize to/from a FILE * pointer or
   an Rconnection pointer are provided in this file; others can be
   built easily.

   A mechanism is provided to allow special handling of non-system
   reference objects (all weak references and external pointers, and
Radford Neal's avatar
Radford Neal committed
132
   all environments other than package environments, namespace
Radford Neal's avatar
Radford Neal committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
   environments, and the global environment).  The hook function
   consists of a function pointer and a data value.  The serialization
   function pointer is called with the reference object and the data
   value as arguments.  It should return R_NilValue for standard
   handling and an STRSXP for special handling.  In an STRSXP is
   returned, then a special handing mark is written followed by the
   strings in the STRSXP (attributes are ignored).  On unserializing,
   any specially marked entry causes a call to the hook function with
   the reconstructed STRSXP and data value as arguments.  This should
   return the value to use for the reference object.  A reasonable
   convention on how to use this mechanism is neded, but again the
   format should be compatible with any reasonable convention.

   Eventually it may be useful to use these hooks to allow objects
   with a class to have a class-specific serialization mechanism.  The
   serialization format should support this.  It is trickier than in
   Java and other reference based languages where creation and
   initialization can be separated--we don't really have that option
   at the R level.  */


154
/* FORWARD DECLARATIONS. */
Radford Neal's avatar
Radford Neal committed
155

Radford Neal's avatar
Radford Neal committed
156 157 158 159 160 161 162 163 164 165 166 167
struct outpar { 
    R_outpstream_t stream;
    SEXP ref_table;
    int nosharing;
    char *buf;
};

struct inpar {
    R_inpstream_t stream;
    SEXP ref_table;
    char *buf;
};
168 169 170 171 172 173 174 175 176 177

static void OutStringVec (struct outpar *par, SEXP s);
static void WriteItem  (struct outpar *par, SEXP s);
static SEXP InStringVec (struct inpar *par);
static SEXP ReadItem (struct inpar *par);
static void WriteBC (struct outpar *par, SEXP s);
static SEXP ReadBC (struct inpar *par);


/* CONSTANTS. */
Radford Neal's avatar
Radford Neal committed
178 179 180 181 182 183

/* The default version used when a stream Init function is called with
   version = 0 */

static const int R_DefaultSerializeVersion = 2;

184 185 186 187

/* UTILITY FUNCTIONS. */

/* An assert function which doesn't crash the program.
Radford Neal's avatar
Radford Neal committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201
 * Something like this might be useful in an R header file
 */

#ifdef NDEBUG
#define R_assert(e) ((void) 0)
#else
/* The line below requires an ANSI C preprocessor (stringify operator) */
#define R_assert(e) ((e) ? (void) 0 : error("assertion '%s' failed: file '%s', line %d\n", #e, __FILE__, __LINE__))
#endif /* NDEBUG */

/* Rsnprintf: like snprintf, but guaranteed to null-terminate. */
static int Rsnprintf(char *buf, int size, const char *format, ...)
{
    int val;
202
    va_list ap;
Radford Neal's avatar
Radford Neal committed
203
    va_start(ap, format);
Radford Neal's avatar
Radford Neal committed
204
    /* On Windows this uses the non-C99 MSVCRT.dll version, which is OK */
Radford Neal's avatar
Radford Neal committed
205 206 207 208 209 210 211
    val = vsnprintf(buf, size, format, ap);
    buf[size-1] = '\0';
    va_end(ap);
    return val;
}


212 213
/* HANDLE XDR ENCODE/DECODE FOR BOTH BIG AND LITTLE ENDIAN MACHINES.

214 215 216
   The "XDR" format is big-endian, but most processors are now little-endian. 
   Handles both below, but may not work for more bizarre mixed-endian machines.
*/
217 218 219

static inline void encode_integer(int i, void *buf)
{
220 221
#   if __GNUC__ && defined(__BYTE_ORDER__)
#       if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
222 223 224 225 226 227 228 229 230
            i = __builtin_bswap32(i);
#       endif
        memcpy(buf,&i,sizeof(int));
#   else
        ((signed char *)buf)[0] = i >> 24;
        ((unsigned char *)buf)[1] = (i >> 16) & 0xff;
        ((unsigned char *)buf)[2] = (i >> 8) & 0xff;
        ((unsigned char *)buf)[3] = i & 0xff;
#   endif
231 232 233 234 235 236 237
}

static inline void encode_double(double d, void *buf)
{
    uint64_t u;
    memcpy(&u,&d,8);

238 239
#   if __GNUC__ && defined(__FLOAT_WORD_ORDER__)
#       if __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__
240 241 242 243 244 245 246 247 248 249 250 251 252
            u = __builtin_bswap64(u);
#       endif
        memcpy(buf,&u,sizeof(double));
#   else
        ((unsigned char *)buf)[0] = (u >> 56) & 0xff;
        ((unsigned char *)buf)[1] = (u >> 48) & 0xff;
        ((unsigned char *)buf)[2] = (u >> 40) & 0xff;
        ((unsigned char *)buf)[3] = (u >> 32) & 0xff;
        ((unsigned char *)buf)[4] = (u >> 24) & 0xff;
        ((unsigned char *)buf)[5] = (u >> 16) & 0xff;
        ((unsigned char *)buf)[6] = (u >> 8) & 0xff;
        ((unsigned char *)buf)[7] = u & 0xff;
#   endif
253 254 255 256
}

static inline int decode_integer(void *buf)
{
257 258
    int i;

259
#   if __GNUC__ && defined(__BYTE_ORDER__)
260
        memcpy(&i,buf,sizeof(int));
261
#       if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
262 263 264 265 266 267 268 269 270 271
            i = __builtin_bswap32(i);
#       endif
#   else
        i = ((int)(((signed char *)buf)[0]) << 24) |
            ((int)(((unsigned char *)buf)[1]) << 16) |
            ((int)(((unsigned char *)buf)[2]) << 8) |
            (int)(((unsigned char *)buf)[3]);
#   endif

    return i;
272 273 274 275
}

static inline double decode_double(void *buf)
{
276 277
    uint64_t u;

278
#   if __GNUC__ && defined(__FLOAT_WORD_ORDER__)
279
        memcpy(&u,buf,sizeof(double));
280
#       if __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__
281 282 283 284 285 286 287 288 289 290 291 292
            u = __builtin_bswap64(u);
#       endif
#   else
        u = ((uint64_t)(((unsigned char *)buf)[0]) << 56) |
            ((uint64_t)(((unsigned char *)buf)[1]) << 48) |
            ((uint64_t)(((unsigned char *)buf)[2]) << 40) |
            ((uint64_t)(((unsigned char *)buf)[3]) << 32) |
            ((uint64_t)(((unsigned char *)buf)[4]) << 24) |
            ((uint64_t)(((unsigned char *)buf)[5]) << 16) |
            ((uint64_t)(((unsigned char *)buf)[6]) << 8) |
            (uint64_t)(((unsigned char *)buf)[7]);
#   endif
293

294
    double d;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
    memcpy(&d,&u,8);
    return d;
}

static attribute_noinline void encode_doubles (double *d, int n, void *buf)
{
    double *p = (double *) buf;
    int i;
    for (i = 0; i < n; i++) {
        encode_double (*d, (void *)p);
        p += 1;
        d += 1;
    }
}

static attribute_noinline void decode_doubles (double *d, int n, void *buf)
{
    double *p = (double *) buf;
    int i;
    for (i = 0; i < n; i++) {
        *d = decode_double ((void *)p);
        p += 1;
        d += 1;
    }
}


Radford Neal's avatar
Radford Neal committed
322
/* SIZE OF BUFFERS USED BY INPUT/OUTPUT ROUTINES. */
323

Radford Neal's avatar
Radford Neal committed
324 325
#define CHUNK_SIZE 1024
#define CBUF_SIZE (CHUNK_SIZE * sizeof (Rcomplex))  /* Rcomplex is biggest */
Radford Neal's avatar
Radford Neal committed
326

327 328 329

/* BASIC OUTPUT ROUTINES. */

Radford Neal's avatar
Radford Neal committed
330
static attribute_noinline void OutInteger (struct outpar *par, int i)
Radford Neal's avatar
Radford Neal committed
331
{
Radford Neal's avatar
Radford Neal committed
332 333 334
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

Radford Neal's avatar
Radford Neal committed
335 336 337
    switch (stream->type) {
    case R_pstream_ascii_format:
	if (i == NA_INTEGER)
Radford Neal's avatar
Radford Neal committed
338
	    Rsnprintf(buf, CBUF_SIZE, "NA\n");
Radford Neal's avatar
Radford Neal committed
339
	else
Radford Neal's avatar
Radford Neal committed
340
	    Rsnprintf(buf, CBUF_SIZE, "%d\n", i);
Radford Neal's avatar
Radford Neal committed
341 342 343
	stream->OutBytes(stream, buf, strlen(buf));
	break;
    case R_pstream_binary_format:
344
	stream->OutBytes(stream, &i, sizeof (int));
Radford Neal's avatar
Radford Neal committed
345 346
	break;
    case R_pstream_xdr_format:
347 348
	encode_integer(i, buf);
	stream->OutBytes(stream, buf, sizeof (int));
Radford Neal's avatar
Radford Neal committed
349 350 351 352 353 354
	break;
    default:
	error(_("unknown or inappropriate output format"));
    }
}

Radford Neal's avatar
Radford Neal committed
355
static attribute_noinline void OutReal (struct outpar *par, double d)
Radford Neal's avatar
Radford Neal committed
356
{
Radford Neal's avatar
Radford Neal committed
357 358 359
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

Radford Neal's avatar
Radford Neal committed
360 361 362 363
    switch (stream->type) {
    case R_pstream_ascii_format:
	if (! R_FINITE(d)) {
	    if (ISNAN(d))
Radford Neal's avatar
Radford Neal committed
364
		Rsnprintf(buf, CBUF_SIZE, "NA\n");
Radford Neal's avatar
Radford Neal committed
365
	    else if (d < 0)
Radford Neal's avatar
Radford Neal committed
366
		Rsnprintf(buf, CBUF_SIZE, "-Inf\n");
Radford Neal's avatar
Radford Neal committed
367
	    else
Radford Neal's avatar
Radford Neal committed
368
		Rsnprintf(buf, CBUF_SIZE, "Inf\n");
Radford Neal's avatar
Radford Neal committed
369 370 371
	}
	else
	    /* 16: full precision; 17 gives 999, 000 &c */
Radford Neal's avatar
Radford Neal committed
372
	    Rsnprintf(buf, CBUF_SIZE, "%.16g\n", d);
Radford Neal's avatar
Radford Neal committed
373 374 375
	stream->OutBytes(stream, buf, strlen(buf));
	break;
    case R_pstream_binary_format:
376
	stream->OutBytes(stream, &d, sizeof (double));
Radford Neal's avatar
Radford Neal committed
377 378
	break;
    case R_pstream_xdr_format:
379 380
	encode_double (d, buf);
	stream->OutBytes(stream, buf, sizeof (double));
Radford Neal's avatar
Radford Neal committed
381 382 383 384 385 386
	break;
    default:
	error(_("unknown or inappropriate output format"));
    }
}

Radford Neal's avatar
Radford Neal committed
387
static void OutComplex (struct outpar *par, Rcomplex c)
Radford Neal's avatar
Radford Neal committed
388
{
Radford Neal's avatar
Radford Neal committed
389 390
    OutReal(par, c.r);
    OutReal(par, c.i);
Radford Neal's avatar
Radford Neal committed
391 392
}

Radford Neal's avatar
Radford Neal committed
393
static attribute_noinline void OutByte (struct outpar *par, Rbyte i)
Radford Neal's avatar
Radford Neal committed
394
{
Radford Neal's avatar
Radford Neal committed
395 396 397
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

Radford Neal's avatar
Radford Neal committed
398 399
    switch (stream->type) {
    case R_pstream_ascii_format:
Radford Neal's avatar
Radford Neal committed
400
	Rsnprintf(buf, CBUF_SIZE, "%02x\n", i);
Radford Neal's avatar
Radford Neal committed
401 402 403 404 405 406 407 408 409 410 411
	stream->OutBytes(stream, buf, strlen(buf));
	break;
    case R_pstream_binary_format:
    case R_pstream_xdr_format:
	stream->OutBytes(stream, &i, 1);
	break;
    default:
	error(_("unknown or inappropriate output format"));
    }
}

Radford Neal's avatar
Radford Neal committed
412
static attribute_noinline void OutString (struct outpar *par, const char *s,
413
                                          int length)
Radford Neal's avatar
Radford Neal committed
414
{
Radford Neal's avatar
Radford Neal committed
415 416 417 418
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

    OutInteger (par, length);
419

Radford Neal's avatar
Radford Neal committed
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
    if (stream->type == R_pstream_ascii_format) {
	int i;
	for (i = 0; i < length; i++) {
	    switch(s[i]) {
	    case '\n': sprintf(buf, "\\n");  break;
	    case '\t': sprintf(buf, "\\t");  break;
	    case '\v': sprintf(buf, "\\v");  break;
	    case '\b': sprintf(buf, "\\b");  break;
	    case '\r': sprintf(buf, "\\r");  break;
	    case '\f': sprintf(buf, "\\f");  break;
	    case '\a': sprintf(buf, "\\a");  break;
	    case '\\': sprintf(buf, "\\\\"); break;
	    case '\?': sprintf(buf, "\\?");  break;
	    case '\'': sprintf(buf, "\\'");  break;
	    case '\"': sprintf(buf, "\\\""); break;
	    default  :
		/* cannot print char in octal mode -> cast to unsigned
		   char first */
		/* actually, since s is signed char and '\?' == 127
		   is handled above, s[i] > 126 can't happen, but
		   I'm superstitious...  -pd */
		if (s[i] <= 32 || s[i] > 126)
		    sprintf(buf, "\\%03o", (unsigned char) s[i]);
		else
		    sprintf(buf, "%c", s[i]);
	    }
	    stream->OutBytes(stream, buf, strlen(buf));
	}
	stream->OutChar(stream, '\n');
    }
    else
	stream->OutBytes(stream, (void *)s, length); /* FIXME: is this case right? */
}


455
/* BASIC INPUT ROUTINES. */
Radford Neal's avatar
Radford Neal committed
456

Radford Neal's avatar
Radford Neal committed
457
static void InWord (R_inpstream_t stream, char *word, int size)
Radford Neal's avatar
Radford Neal committed
458 459 460 461 462 463 464 465 466
{
    int c, i;
    i = 0;
    do {
	c = stream->InChar(stream);
	if (c == EOF)
	    error(_("read error"));
    } while (isspace(c));
    while (! isspace(c) && i < size) {
Radford Neal's avatar
Radford Neal committed
467
	word[i++] = c;
Radford Neal's avatar
Radford Neal committed
468 469 470 471
	c = stream->InChar(stream);
    }
    if (i == size)
	error(_("read error"));
Radford Neal's avatar
Radford Neal committed
472
    word[i] = 0;
Radford Neal's avatar
Radford Neal committed
473 474
}

Radford Neal's avatar
Radford Neal committed
475 476
static char word[100];  /* used in InInteger and InReal */

477
static attribute_noinline int InInteger (struct inpar *par)
Radford Neal's avatar
Radford Neal committed
478
{
Radford Neal's avatar
Radford Neal committed
479 480 481
    R_inpstream_t stream = par->stream;
    char *buf = par->buf;

Radford Neal's avatar
Radford Neal committed
482 483 484
    int i;

    switch (stream->type) {
Radford Neal's avatar
Radford Neal committed
485 486 487 488
    case R_pstream_ascii_format: ;
	InWord(stream, buf, CBUF_SIZE);
	sscanf(buf, "%s", word);
	if (strcmp(word, "NA") == 0)
Radford Neal's avatar
Radford Neal committed
489 490
	    return NA_INTEGER;
	else
Radford Neal's avatar
Radford Neal committed
491
	    sscanf(word, "%d", &i);
Radford Neal's avatar
Radford Neal committed
492 493
	return i;
    case R_pstream_binary_format:
494
	stream->InBytes(stream, &i, sizeof (int));
Radford Neal's avatar
Radford Neal committed
495 496
	return i;
    case R_pstream_xdr_format:
497 498
	stream->InBytes(stream, buf, sizeof (int));
	return decode_integer(buf);
Radford Neal's avatar
Radford Neal committed
499 500 501 502 503
    default:
	return NA_INTEGER;
    }
}

504
static attribute_noinline double InReal (struct inpar *par)
Radford Neal's avatar
Radford Neal committed
505
{
Radford Neal's avatar
Radford Neal committed
506 507 508
    R_inpstream_t stream = par->stream;
    char *buf = par->buf;

Radford Neal's avatar
Radford Neal committed
509 510 511
    double d;

    switch (stream->type) {
Radford Neal's avatar
Radford Neal committed
512 513 514 515
    case R_pstream_ascii_format: ;
	InWord(stream, buf, CBUF_SIZE);
	sscanf(buf, "%s", word);
	if (strcmp(word, "NA") == 0)
Radford Neal's avatar
Radford Neal committed
516
	    return NA_REAL;
Radford Neal's avatar
Radford Neal committed
517
	else if (strcmp(word, "Inf") == 0)
Radford Neal's avatar
Radford Neal committed
518
	    return R_PosInf;
Radford Neal's avatar
Radford Neal committed
519
	else if (strcmp(word, "-Inf") == 0)
Radford Neal's avatar
Radford Neal committed
520 521
	    return R_NegInf;
	else
Radford Neal's avatar
Radford Neal committed
522
	    sscanf(word, "%lg", &d);
Radford Neal's avatar
Radford Neal committed
523 524
	return d;
    case R_pstream_binary_format:
525
	stream->InBytes(stream, &d, sizeof (double));
Radford Neal's avatar
Radford Neal committed
526 527
	return d;
    case R_pstream_xdr_format:
528 529
	stream->InBytes(stream, buf, sizeof (double));
	return decode_double (buf);
Radford Neal's avatar
Radford Neal committed
530 531 532 533 534
    default:
	return NA_REAL;
    }
}

Radford Neal's avatar
Radford Neal committed
535
static Rcomplex InComplex (struct inpar *par)
Radford Neal's avatar
Radford Neal committed
536 537
{
    Rcomplex c;
Radford Neal's avatar
Radford Neal committed
538 539
    c.r = InReal(par);
    c.i = InReal(par);
Radford Neal's avatar
Radford Neal committed
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
    return c;
}

/* These utilities for reading characters with an unget option are
   defined so the code in InString can match the code in
   saveload.c:InStringAscii--that way it is easier to match changes in
   one to the other. */
typedef struct R_instring_stream_st {
    int last;
    R_inpstream_t stream;
} *R_instring_stream_t;

static void InitInStringStream(R_instring_stream_t s, R_inpstream_t stream)
{
    s->last = EOF;
    s->stream = stream;
}

558
static R_INLINE int GetChar(R_instring_stream_t s)
Radford Neal's avatar
Radford Neal committed
559 560 561 562 563 564 565 566 567 568
{
    int c;
    if (s->last != EOF) {
	c = s->last;
	s->last = EOF;
    }
    else c = s->stream->InChar(s->stream);
    return c;
}

569
static R_INLINE void UngetChar(R_instring_stream_t s, int c)
Radford Neal's avatar
Radford Neal committed
570 571 572 573 574
{
    s->last = c;
}


575
static attribute_noinline void InString (R_inpstream_t stream, char *buf, int length)
Radford Neal's avatar
Radford Neal committed
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
{
    if (stream->type == R_pstream_ascii_format) {
	if (length > 0) {
	    int c, d, i, j;
	    struct R_instring_stream_st iss;

	    InitInStringStream(&iss, stream);
	    while(isspace(c = GetChar(&iss)))
		;
	    UngetChar(&iss, c);
	    for (i = 0; i < length; i++) {
		if ((c =  GetChar(&iss)) == '\\') {
		    switch(c = GetChar(&iss)) {
		    case 'n' : buf[i] = '\n'; break;
		    case 't' : buf[i] = '\t'; break;
		    case 'v' : buf[i] = '\v'; break;
		    case 'b' : buf[i] = '\b'; break;
		    case 'r' : buf[i] = '\r'; break;
		    case 'f' : buf[i] = '\f'; break;
		    case 'a' : buf[i] = '\a'; break;
		    case '\\': buf[i] = '\\'; break;
		    case '?' : buf[i] = '\?'; break;
		    case '\'': buf[i] = '\''; break;
		    case '\"': buf[i] = '\"'; break; /* closing " for emacs */
		    case '0': case '1': case '2': case '3':
		    case '4': case '5': case '6': case '7':
			d = 0; j = 0;
			while('0' <= c && c < '8' && j < 3) {
			    d = d * 8 + (c - '0');
			    c = GetChar(&iss);
			    j++;
			}
			buf[i] = d;
			UngetChar(&iss, c);
			break;
		    default  : buf[i] = c;
		    }
		}
		else buf[i] = c;
	    }
	}
    }
Radford Neal's avatar
Radford Neal committed
618
    else  /* this limits the string length: used for CHARSXPs */
Radford Neal's avatar
Radford Neal committed
619 620 621 622 623 624 625 626 627 628 629 630 631
	stream->InBytes(stream, buf, length);
}


/*
 * Format Header Reading and Writing
 *
 * The header starts with one of three characters, A for ascii, B for
 * binary, or X for xdr.
 */

static void OutFormat(R_outpstream_t stream)
{
Radford Neal's avatar
Radford Neal committed
632
/*    if (stream->type == R_pstream_binary_format) {
Radford Neal's avatar
Radford Neal committed
633 634
	warning(_("binary format is deprecated; using xdr instead"));
	stream->type = R_pstream_xdr_format;
Radford Neal's avatar
Radford Neal committed
635
	} */
Radford Neal's avatar
Radford Neal committed
636 637 638 639 640 641 642 643 644 645 646 647
    switch (stream->type) {
    case R_pstream_ascii_format:  stream->OutBytes(stream, "A\n", 2); break;
    case R_pstream_binary_format: stream->OutBytes(stream, "B\n", 2); break;
    case R_pstream_xdr_format:    stream->OutBytes(stream, "X\n", 2); break;
    case R_pstream_any_format:
	error(_("must specify ascii, binary, or xdr format"));
    default: error(_("unknown output format"));
    }
}

static void InFormat(R_inpstream_t stream)
{
Radford Neal's avatar
Radford Neal committed
648
    char buf[2];
Radford Neal's avatar
Radford Neal committed
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
    R_pstream_format_t type;
    stream->InBytes(stream, buf, 2);
    switch (buf[0]) {
    case 'A': type = R_pstream_ascii_format; break;
    case 'B': type = R_pstream_binary_format; break;
    case 'X': type = R_pstream_xdr_format; break;
    case '\n':
	/* GROSS HACK: ASCII unserialize may leave a trailing newline
	   in the stream.  If the stream contains a second
	   serialization, then a second unserialize will fail if such
	   a newline is present.  The right fix is to make sure
	   unserialize consumes exactly what serialize produces.  But
	   this seems hard because of the current use of whitespace
	   skipping in unserialize.  So a temporary hack to cure the
	   symptom is to deal with a possible leading newline.  I
	   don't think more than one is possible, but I'm not sure.
	   LT */
	if (buf[1] == 'A') {
	    type = R_pstream_ascii_format;
	    stream->InBytes(stream, buf, 1);
	    break;
	}
    default:
	error(_("unknown input format"));
    }
    if (stream->type == R_pstream_any_format)
	stream->type = type;
    else if (type != stream->type)
	error(_("input format does not match specified format"));
}


681
/* HASH TABLE FUNCTIONS.
Radford Neal's avatar
Radford Neal committed
682

683 684 685 686 687
   Hashing functions for hashing reference objects during writing.
   Objects are entered, and the order in which they are encountered is
   recorded.  HashGet returns this number, a positive integer, if the
   object was seen before, and zero if not.  A fixed hash table size
   is used, which seems adequate for now. 
Radford Neal's avatar
Radford Neal committed
688

689 690 691 692
   The hash table is a VECSXP, with count in TRUELENGTH, and buckets
   that are chains of VECSXP nodes of length 2, with value in TRUELENGTH
   and key and link as the two elements.
*/
Radford Neal's avatar
Radford Neal committed
693

694
#define HASHSIZE_HERE 1103
Radford Neal's avatar
Radford Neal committed
695

696
#define PTRHASH(obj) ((unsigned)((R_size_t)obj ^ ((R_size_t)obj>>16)) >> 2)
Radford Neal's avatar
Radford Neal committed
697

698 699 700 701 702
#define HASH_TABLE_COUNT(ht) TRUELENGTH(ht)
#define SET_HASH_TABLE_COUNT(ht,val) SET_TRUELENGTH(ht,val)

#define HASH_BUCKET(ht,pos) VECTOR_ELT(ht, pos)
#define SET_HASH_BUCKET(ht,pos,val) SET_VECTOR_ELT(ht,pos,val)
Radford Neal's avatar
Radford Neal committed
703 704 705

static SEXP MakeHashTable(void)
{
706 707 708
    SEXP ht = allocVector (VECSXP, HASHSIZE_HERE);
    SET_HASH_TABLE_COUNT (ht, 0);
    return ht;
Radford Neal's avatar
Radford Neal committed
709 710
}

711
static attribute_noinline void HashAdd (SEXP obj, SEXP ht)
Radford Neal's avatar
Radford Neal committed
712
{
713
    int pos = PTRHASH(obj) % HASHSIZE_HERE;
Radford Neal's avatar
Radford Neal committed
714 715
    int count = HASH_TABLE_COUNT(ht) + 1;

716 717 718 719 720 721 722
    SEXP cell = allocVector(VECSXP,2);
    SET_TRUELENGTH(cell,count);
    SET_VECTOR_ELT(cell,0,obj);
    SET_VECTOR_ELT(cell,1,HASH_BUCKET(ht,pos));

    SET_HASH_BUCKET (ht, pos, cell);
    SET_HASH_TABLE_COUNT (ht, count);
Radford Neal's avatar
Radford Neal committed
723 724
}

725
static attribute_noinline int HashGet (SEXP obj, SEXP ht)
Radford Neal's avatar
Radford Neal committed
726
{
727
    int pos = PTRHASH(obj) % HASHSIZE_HERE;
Radford Neal's avatar
Radford Neal committed
728
    SEXP cell;
729 730 731 732 733 734
    for (cell = HASH_BUCKET(ht,pos); 
         cell != R_NilValue; 
         cell = VECTOR_ELT(cell,1)) {
	if (obj == VECTOR_ELT(cell,0))
	    return TRUELENGTH(cell);
    }
Radford Neal's avatar
Radford Neal committed
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
    return 0;
}

/*
 * Administrative SXP values
 *
 * These macros defind SXP "type" for specifying special object, such
 * as R_NilValue, or control information, like REFSXP or NAMESPACESXP.
 * The range of SXP types is limited to 5 bit by the current sxpinfo
 * layout, but just in case these values are placed at the top of the
 * 8 bit range.
 */

#define REFSXP            255
#define NILVALUE_SXP      254
#define GLOBALENV_SXP     253
#define UNBOUNDVALUE_SXP  252
#define MISSINGARG_SXP    251
#define BASENAMESPACE_SXP 250
#define NAMESPACESXP      249
#define PACKAGESXP        248
#define PERSISTSXP        247
757

Radford Neal's avatar
Radford Neal committed
758 759 760 761 762 763 764 765
/* the following are speculative--we may or may not need them soon */
#define CLASSREFSXP       246
#define GENERICREFSXP     245
#define BCREPDEF          244
#define BCREPREF          243
#define EMPTYENV_SXP	  242
#define BASEENV_SXP	  241

Radford Neal's avatar
Radford Neal committed
766 767 768 769 770 771 772 773 774 775 776 777 778
/* The following are needed to preserve attribute information on
   expressions in the constant pool of byte code objects. This is
   mainly for preserving source references attributes.  The original
   implementation of the sharing-preserving writing and reading of yte
   code objects did not account for the need to preserve attributes,
   so there is now a work-around using these SXP types to flag when
   the ATTRIB field has been written out. Object bits and S4 bits are
   still not preserved.  It the long run in might be better to change
   to a scheme in which all sharing is preserved and byte code objects
   don't need to be handled as a special case.  LT */
#define ATTRLANGSXP       240
#define ATTRLISTSXP       239

779 780 781
/* the following added for pqR */
#define MISSINGUNDER_SXP 229

Radford Neal's avatar
Radford Neal committed
782 783 784 785 786 787 788 789 790 791 792 793 794
/*
 * Type/Flag Packing and Unpacking
 *
 * To reduce space consumption for serializing code (lots of list
 * structure) the type (at most 8 bits), several single bit flags,
 * and the sxpinfo gp field (LEVELS, 16 bits) are packed into a single
 * integer.  The integer is signed, so this shouldn't be pushed too
 * far.  It assumes at least 28 bits, but that should be no problem.
 */

#define IS_OBJECT_BIT_MASK (1 << 8)
#define HAS_ATTR_BIT_MASK (1 << 9)
#define HAS_TAG_BIT_MASK (1 << 10)
795
#define IS_CONSTANT_MASK (1 << 11)
Radford Neal's avatar
Radford Neal committed
796 797 798 799
#define ENCODE_LEVELS(v) ((v) << 12)
#define DECODE_LEVELS(v) ((v) >> 12)
#define DECODE_TYPE(v) ((v) & 255)

800 801
static R_INLINE int PackFlags(int type, int levs, int isobj, int hasattr, 
                              int hastag, int isconstant)
Radford Neal's avatar
Radford Neal committed
802 803 804 805 806 807
{
    /* We don't write out bit 5 as from R 2.8.0.
       It is used to indicate if an object is in CHARSXP cache
       - not that it matters to this version of R, but it saves
       checking all previous versions.

808
       Also make sure the former HASHASH bit (1) is not written out.
Radford Neal's avatar
Radford Neal committed
809 810
    */
    int val;
811
    if (type == CHARSXP) levs &= (~(CACHED_MASK | 1 /* was HASHASH */));
Radford Neal's avatar
Radford Neal committed
812 813 814 815
    val = type | ENCODE_LEVELS(levs);
    if (isobj) val |= IS_OBJECT_BIT_MASK;
    if (hasattr) val |= HAS_ATTR_BIT_MASK;
    if (hastag) val |= HAS_TAG_BIT_MASK;
816
    if (isconstant) val |= IS_CONSTANT_MASK;
Radford Neal's avatar
Radford Neal committed
817 818 819
    return val;
}

820 821 822
static R_INLINE void UnpackFlags(int flags, SEXPTYPE *ptype, int *plevs,
                                 int *pisobj, int *phasattr, int *phastag,
                                 int *pisconstant)
Radford Neal's avatar
Radford Neal committed
823
{
824 825 826 827 828 829 830
    if ((*ptype = DECODE_TYPE(flags)) != REFSXP) {
        *plevs = DECODE_LEVELS(flags);
        *pisobj = (flags & IS_OBJECT_BIT_MASK) != 0;
        *phasattr = (flags & HAS_ATTR_BIT_MASK) != 0;
        *phastag = (flags & HAS_TAG_BIT_MASK) != 0;
        *pisconstant = (flags & IS_CONSTANT_MASK) != 0;
    }
Radford Neal's avatar
Radford Neal committed
831 832 833
}


834 835 836 837 838 839 840
/* REFERENCE/INDEX PACKING AND UNPACKING.

   Code will contain many references to symbols. As long as there are
   not too many references, the index and the REFSXP flag indicating a
   reference can be packed in a single integeger.  Since the index is
   1-based, a 0 is used to indicate an index that doesn't fit and
   therefore follows. */
Radford Neal's avatar
Radford Neal committed
841 842 843 844 845

#define PACK_REF_INDEX(i) (((i) << 8) | REFSXP)
#define UNPACK_REF_INDEX(i) ((i) >> 8)
#define MAX_PACKED_INDEX (INT_MAX >> 8)

Radford Neal's avatar
Radford Neal committed
846
static R_INLINE void OutRefIndex (struct outpar *par, int i)
Radford Neal's avatar
Radford Neal committed
847 848
{
    if (i > MAX_PACKED_INDEX) {
Radford Neal's avatar
Radford Neal committed
849 850
	OutInteger(par, REFSXP);
	OutInteger(par, i);
Radford Neal's avatar
Radford Neal committed
851
    }
Radford Neal's avatar
Radford Neal committed
852
    else OutInteger(par, PACK_REF_INDEX(i));
Radford Neal's avatar
Radford Neal committed
853 854
}

Radford Neal's avatar
Radford Neal committed
855
static R_INLINE int InRefIndex (struct inpar *par, int flags)
Radford Neal's avatar
Radford Neal committed
856 857 858
{
    int i = UNPACK_REF_INDEX(flags);
    if (i == 0)
Radford Neal's avatar
Radford Neal committed
859
	return InInteger(par);
Radford Neal's avatar
Radford Neal committed
860 861 862 863 864
    else
	return i;
}


865 866 867 868
/* PERSISTENT NAME HOOKS.

   These routines call the appropriate hook functions for allowing
   customized handling of reference objects. */
Radford Neal's avatar
Radford Neal committed
869

870
static inline SEXP GetPersistentName(R_outpstream_t stream, SEXP s)
Radford Neal's avatar
Radford Neal committed
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
{
    if (stream->OutPersistHookFunc != NULL) {
	switch (TYPEOF(s)) {
	case WEAKREFSXP:
	case EXTPTRSXP: break;
	case ENVSXP:
	    if (s == R_GlobalEnv ||
		s == R_BaseEnv ||
		s == R_EmptyEnv ||
		R_IsNamespaceEnv(s) ||
		R_IsPackageEnv(s))
		return R_NilValue;
	    else
		break;
	default: return R_NilValue;
	}
	return stream->OutPersistHookFunc(s, stream->OutPersistHookData);
    }
    else
	return R_NilValue;
}

893
static inline SEXP PersistentRestore(R_inpstream_t stream, SEXP s)
Radford Neal's avatar
Radford Neal committed
894 895 896 897 898 899 900
{
    if (stream->InPersistHookFunc == NULL)
	error(_("no restore method available"));
    return stream->InPersistHookFunc(s, stream->InPersistHookData);
}


901
/* SERIALIZATION CODE. */
Radford Neal's avatar
Radford Neal committed
902

903
static inline int SaveSpecialHook(SEXP item)
Radford Neal's avatar
Radford Neal committed
904 905 906 907 908 909 910
{
    if (item == R_NilValue)      return NILVALUE_SXP;
    if (item == R_EmptyEnv)	 return EMPTYENV_SXP;
    if (item == R_BaseEnv)	 return BASEENV_SXP;
    if (item == R_GlobalEnv)     return GLOBALENV_SXP;
    if (item == R_UnboundValue)  return UNBOUNDVALUE_SXP;
    if (item == R_MissingArg)    return MISSINGARG_SXP;
911
    if (item == R_MissingUnder)  return MISSINGUNDER_SXP;
Radford Neal's avatar
Radford Neal committed
912 913 914 915
    if (item == R_BaseNamespace) return BASENAMESPACE_SXP;
    return 0;
}

916
static attribute_noinline void OutStringVec (struct outpar *par, SEXP s)
Radford Neal's avatar
Radford Neal committed
917
{
918 919 920
    R_outpstream_t stream = par->stream;
    SEXP ref_table = par->ref_table;

Radford Neal's avatar
Radford Neal committed
921 922 923 924 925
    int i, len;

    R_assert(TYPEOF(s) == STRSXP);

#ifdef WARN_ABOUT_NAMES_IN_PERSISTENT_STRINGS
Radford Neal's avatar
Radford Neal committed
926
    SEXP names = getAttrib(s, R_NamesSymbol);
Radford Neal's avatar
Radford Neal committed
927 928 929 930 931
    if (names != R_NilValue)
	warning(_("names in persistent strings are currently ignored"));
#endif

    len = LENGTH(s);
Radford Neal's avatar
Radford Neal committed
932 933
    OutInteger(par, 0); /* place holder to allow names if we want to */
    OutInteger(par, len);
Radford Neal's avatar
Radford Neal committed
934
    for (i = 0; i < len; i++)
935
	WriteItem (par, STRING_ELT(s, i));
Radford Neal's avatar
Radford Neal committed
936 937
}

Radford Neal's avatar
Radford Neal committed
938 939 940
#define min2(a, b) ((a) < (b)) ? (a) : (b)

/* length will need to be another type to allow longer vectors */
Radford Neal's avatar
Radford Neal committed
941
static attribute_noinline void OutIntegerVec (struct outpar *par, SEXP s)
Radford Neal's avatar
Radford Neal committed
942
{
Radford Neal's avatar
Radford Neal committed
943 944 945
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

946
    R_len_t length = LENGTH(s);
Radford Neal's avatar
Radford Neal committed
947
    OutInteger(par, length);
948

Radford Neal's avatar
Radford Neal committed
949 950 951
    switch (stream->type) {
    case R_pstream_xdr_format:
    {
952
	int done, this;
Radford Neal's avatar
Radford Neal committed
953 954
	for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
955 956
	    for (int cnt = 0; cnt < this; cnt++)
                encode_integer (INTEGER(s)[done+cnt], buf + sizeof(int) * cnt);
Radford Neal's avatar
Radford Neal committed
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
	    stream->OutBytes(stream, buf, sizeof(int) * this);
	}
	break;
    }
    case R_pstream_binary_format:
    {
	/* write in chunks to avoid overflowing ints */
	int done, this;
	for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
	    stream->OutBytes(stream, INTEGER(s) + done, sizeof(int) * this);
	}
	break;
    }
    default:
	for (int cnt = 0; cnt < length; cnt++)
Radford Neal's avatar
Radford Neal committed
973
	    OutInteger(par, INTEGER(s)[cnt]);
Radford Neal's avatar
Radford Neal committed
974 975 976
    }
}

Radford Neal's avatar
Radford Neal committed
977
static attribute_noinline void OutRealVec (struct outpar *par, SEXP s) 
Radford Neal's avatar
Radford Neal committed
978
{
Radford Neal's avatar
Radford Neal committed
979 980 981
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

982
    R_len_t length = LENGTH(s);
Radford Neal's avatar
Radford Neal committed
983
    OutInteger(par, length);
984

Radford Neal's avatar
Radford Neal committed
985 986 987 988 989 990
    switch (stream->type) {
    case R_pstream_xdr_format:
    {
	int done, this;
        for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
991
            encode_doubles (REAL(s)+done, this, buf);
Radford Neal's avatar
Radford Neal committed
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
	    stream->OutBytes(stream, buf, sizeof(double) * this);
	}
	break;
    }
    case R_pstream_binary_format:
    {
	int done, this;
        for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
	    stream->OutBytes(stream, REAL(s) + done, sizeof(double) * this);
	}
	break;
    }
    default:
	for (int cnt = 0; cnt < length; cnt++)
Radford Neal's avatar
Radford Neal committed
1007
	    OutReal(par, REAL(s)[cnt]);
Radford Neal's avatar
Radford Neal committed
1008 1009
    }
}
Radford Neal's avatar
Radford Neal committed
1010

Radford Neal's avatar
Radford Neal committed
1011
static attribute_noinline void OutComplexVec (struct outpar *par, SEXP s)
Radford Neal's avatar
Radford Neal committed
1012
{
Radford Neal's avatar
Radford Neal committed
1013 1014 1015
    R_outpstream_t stream = par->stream;
    char *buf = par->buf;

1016
    R_len_t length = LENGTH(s);
Radford Neal's avatar
Radford Neal committed
1017
    OutInteger(par, length);
1018

Radford Neal's avatar
Radford Neal committed
1019 1020 1021 1022 1023 1024 1025
    switch (stream->type) {
    case R_pstream_xdr_format:
    {
	int done, this;
	Rcomplex *c = COMPLEX(s);
        for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
1026
            encode_doubles ((double*)(COMPLEX(s)+done), 2*this, buf);
Radford Neal's avatar
Radford Neal committed
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
	    stream->OutBytes(stream, buf, sizeof(Rcomplex) * this);
	}
	break;
    }
    case R_pstream_binary_format:
    {
	int done, this;
        for (done = 0; done < length; done += this) {
	    this = min2(CHUNK_SIZE, length - done);
	    stream->OutBytes(stream, COMPLEX(s) + done, 
1037
			     sizeof(Rcomplex) * this);
Radford Neal's avatar
Radford Neal committed
1038 1039 1040 1041 1042
	}
	break;
    }
    default:
	for (int cnt = 0; cnt < length; cnt++)
Radford Neal's avatar
Radford Neal committed
1043
	    OutComplex(par, COMPLEX(s)[cnt]);
Radford Neal's avatar
Radford Neal committed
1044 1045
    }
}
Radford Neal's avatar
Radford Neal committed
1046

1047
static void WriteItem (struct outpar *par, SEXP s)
Radford Neal's avatar
Radford Neal committed
1048
{
1049 1050 1051 1052
    R_outpstream_t stream = par->stream;
    SEXP ref_table = par->ref_table;
    int nosharing = par->nosharing;

Radford Neal's avatar
Radford Neal committed
1053
    int i;
Radford Neal's avatar
Radford Neal committed
1054
    int ix; /* this could be a different type for longer vectors */
Radford Neal's avatar
Radford Neal committed
1055 1056
    SEXP t;

Radford Neal's avatar
Radford Neal committed
1057 1058 1059 1060
    if (R_compile_pkgs && TYPEOF(s) == CLOSXP && TYPEOF(BODY(s)) != BCODESXP) {
	SEXP new_s;
	R_compile_pkgs = FALSE;
	PROTECT(new_s = R_cmpfun(s));
1061
	WriteItem (par, new_s);
Radford Neal's avatar
Radford Neal committed
1062 1063 1064 1065 1066
	UNPROTECT(1);
	R_compile_pkgs = TRUE;
	return;
    }

1067 1068 1069 1070 1071
 tailcall: ;

    int cannot_be_special = ((VECTOR_TYPES | CONS_TYPES) >> TYPEOF(s)) & 1;

    if (!cannot_be_special && (i = SaveSpecialHook(s)) != 0) {
Radford Neal's avatar
Radford Neal committed
1072
	OutInteger(par, i);
1073 1074 1075 1076
        return;
    }

    if (!cannot_be_special && (t = GetPersistentName(stream,s)) != R_NilValue) {
Radford Neal's avatar
Radford Neal committed
1077 1078 1079
	R_assert(TYPEOF(t) == STRSXP && LENGTH(t) > 0);
	PROTECT(t);
	HashAdd(s, ref_table);
Radford Neal's avatar
Radford Neal committed
1080
	OutInteger(par, PERSISTSXP);
1081
	OutStringVec (par, t);
Radford Neal's avatar
Radford Neal committed
1082
	UNPROTECT(1);
1083
        return;
Radford Neal's avatar
Radford Neal committed
1084
    }
1085

1086
    if ((i = HashGet(s, ref_table)) != 0) {
Radford Neal's avatar
Radford Neal committed
1087
	OutRefIndex(par, i);
1088 1089 1090
        return;
    }

1091 1092
    R_CHECKSTACK();

1093
    if (TYPEOF(s) == SYMSXP) {
Radford Neal's avatar
Radford Neal committed
1094 1095
	/* Note : NILSXP can't occur here */
	HashAdd(s, ref_table);
Radford Neal's avatar
Radford Neal committed
1096
	OutInteger(par, SYMSXP);
1097
	WriteItem (par, PRINTNAME(s));
1098
        return;
Radford Neal's avatar
Radford Neal committed
1099
    }
1100 1101

    if (TYPEOF(s) == ENVSXP) {
Radford Neal's avatar
Radford Neal committed
1102 1103 1104 1105 1106
	HashAdd(s, ref_table);
	if (R_IsPackageEnv(s)) {
	    SEXP name = R_PackageEnvName(s);
	    warning(_("'%s' may not be available when loading"),
		    CHAR(STRING_ELT(name, 0)));
Radford Neal's avatar
Radford Neal committed
1107
	    OutInteger(par, PACKAGESXP);
1108
	    OutStringVec (par, name);
Radford Neal's avatar
Radford Neal committed
1109 1110 1111 1112 1113
	}
	else if (R_IsNamespaceEnv(s)) {
#ifdef WARN_ABOUT_NAME_SPACES_MAYBE_NOT_AVAILABLE
	    warning(_("namespaces may not be available when loading"));
#endif
Radford Neal's avatar
Radford Neal committed
1114
	    OutInteger(par, NAMESPACESXP);
1115
	    OutStringVec (par, PROTECT(R_NamespaceEnvSpec(s)));
1116
	    UNPROTECT(1);
Radford Neal's avatar
Radford Neal committed
1117 1118
	}
	else {
Radford Neal's avatar
Radford Neal committed
1119 1120
	    OutInteger(par, ENVSXP);
	    OutInteger(par, R_EnvironmentIsLocked(s) ? 1 : 0);
1121 1122
	    WriteItem (par, ENCLOS(s));
	    WriteItem (par, FRAME(s));
Radford Neal's avatar
Radford Neal committed
1123 1124
            SEXP newtable = HASHTAB(s) == R_NilValue ? R_NilValue
                             : R_HashRehashOld(HASHTAB(s));
1125
            PROTECT(newtable);
1126
	    WriteItem (par, newtable);
1127
            UNPROTECT(1);
1128
	    WriteItem (par, ATTRIB(s));
Radford Neal's avatar
Radford Neal committed
1129
	}
1130
        return;
Radford Neal's avatar
Radford Neal committed
1131
    }
1132

1133
    int flags, hastag, hasattr;
1134

1135
    hastag = FALSE;
1136

1137 1138 1139 1140 1141
    if((((1<<LISTSXP) + (1<<LANGSXP) + (1<<CLOSXP) + (1<<PROMSXP) + (1<<DOTSXP))
          >> TYPEOF(s)) & 1) {
        if (TAG(s) != R_NilValue)
            hastag = TRUE;
    }
1142

1143 1144 1145 1146
    /* The CHARSXP cache chains are maintained through the ATTRIB
       field, so the content of that field must not be serialized, so
       we treat it as not there. */
    hasattr = ATTRIB(s) != R_NilValue && TYPEOF(s) != CHARSXP;
1147

1148 1149
    flags = PackFlags(TYPEOF(s), LEVELS(s), OBJECT(s),
                      hasattr, hastag, nosharing ? 0 : IS_CONSTANT(s));
1150

Radford Neal's avatar
Radford Neal committed
1151
    OutInteger(par, flags);
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162

    switch (TYPEOF(s)) {
    case LISTSXP:
    case LANGSXP:
    case CLOSXP:
    case PROMSXP:
    case DOTSXP:
        /* Dotted pair objects */
        /* These write their ATTRIB fields first to allow us to avoid
           recursion on the CDR */
        if (hasattr)
1163
            WriteItem (par, ATTRIB(s));
1164
        if (TAG(s) != R_NilValue)
1165 1166
            WriteItem (par, TAG(s));
        WriteItem (par, CAR(s));
1167 1168 1169 1170 1171 1172
        /* now do a tail call to WriteItem to handle the CDR */
        s = CDR(s);
        goto tailcall;
    case EXTPTRSXP:
        /* external pointers */
        HashAdd(s, ref_table);
1173 1174
        WriteItem (par, EXTPTR_PROT(s));
        WriteItem (par, EXTPTR_TAG(s));
1175 1176 1177 1178 1179 1180 1181 1182
        break;
    case WEAKREFSXP:
        /* Weak references */
        HashAdd(s, ref_table);
        break;
    case SPECIALSXP:
    case BUILTINSXP:
        /* Builtin functions */
Radford Neal's avatar
Radford Neal committed
1183
        OutString(par, PRIMNAME(s), strlen(PRIMNAME(s)));
1184 1185 1186
        break;
    case CHARSXP:
        if (s == NA_STRING)
Radford Neal's avatar
Radford Neal committed
1187
            OutInteger(par, -1);
1188
        else
Radford Neal's avatar
Radford Neal committed
1189
            OutString(par, CHAR(s), LENGTH(s));
1190 1191 1192
        break;
    case LGLSXP:
    case INTSXP:
Radford Neal's avatar
Radford Neal committed
1193
        OutIntegerVec(par, s);
1194 1195
        break;
    case REALSXP:
Radford Neal's avatar
Radford Neal committed
1196
        OutRealVec(par, s);
1197 1198
        break;
    case CPLXSXP:
Radford Neal's avatar
Radford Neal committed
1199
        OutComplexVec(par, s);
1200 1201
        break;
    case STRSXP:
Radford Neal's avatar
Radford Neal committed
1202
        OutInteger(par, LENGTH(s));
1203
        for (ix = 0; ix < LENGTH(s); ix++)
1204
            WriteItem (par, STRING_ELT(s, ix));
1205 1206 1207
        break;
    case VECSXP:
    case EXPRSXP:
Radford Neal's avatar
Radford Neal committed
1208
        OutInteger(par, LENGTH(s));
1209
        for (ix = 0; ix < LENGTH(s); ix++)
1210
            WriteItem (par, VECTOR_ELT(s, ix));
1211 1212
        break;
    case BCODESXP:
1213
        WriteBC (par, s);
1214 1215
        break;
    case RAWSXP:
Radford Neal's avatar
Radford Neal committed
1216
        OutInteger(par, LENGTH(s));
1217 1218
        switch (stream->type) {
        case R_pstream_xdr_format:
Radford Neal's avatar
Radford Neal committed
1219 1220
        case R_pstream_binary_format: {
            int done, this, len = LENGTH(s);
1221 1222 1223 1224 1225 1226 1227 1228
            for (done = 0; done < len; done += this) {
                this = min2(CHUNK_SIZE, len - done);
                stream->OutBytes(stream, RAW(s) + done, this);
            }
            break;
        }
        default:
            for (ix = 0; ix < LENGTH(s); ix++) 
Radford Neal's avatar
Radford Neal committed
1229
                OutByte(par, RAW(s)[ix]);
1230 1231 1232 1233 1234 1235
        }
        break;
    case S4SXP:
      break; /* only attributes (i.e., slots) count */
    default:
        error(_("WriteItem: unknown type %i"), TYPEOF(s));
Radford Neal's avatar
Radford Neal committed
1236
    }
1237 1238

    if (hasattr)
1239
        WriteItem (par, ATTRIB(s));
Radford Neal's avatar
Radford Neal committed
1240 1241 1242 1243
}

static SEXP MakeCircleHashTable(void)
{
1244
    return CONS(R_NilValue, allocVector(VECSXP, HASHSIZE_HERE));
Radford Neal's avatar
Radford Neal committed
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311
}

static Rboolean AddCircleHash(SEXP item, SEXP ct)
{
    SEXP table, bucket, list;
    int pos;

    table = CDR(ct);
    pos = PTRHASH(item) % LENGTH(table);
    bucket = VECTOR_ELT(table, pos);
    for (list = bucket; list != R_NilValue; list = CDR(list))
	if (TAG(list) == item) {
	    if (CAR(list) == R_NilValue) {
		/* this is the second time; enter in list and mark */
		SETCAR(list, R_UnboundValue); /* anything different will do */
		SETCAR(ct, CONS(item, CAR(ct)));
	    }
	    return TRUE;
	}

    /* If we get here then this is a new item; enter in the table */
    bucket = CONS(R_NilValue, bucket);
    SET_TAG(bucket, item);
    SET_VECTOR_ELT(table, pos, bucket);
    return FALSE;
}

static void ScanForCircles1(SEXP s, SEXP ct)
{
    switch (TYPEOF(s)) {
    case LANGSXP:
    case LISTSXP:
	if (! AddCircleHash(s, ct)) {
	    ScanForCircles1(CAR(s), ct);
	    ScanForCircles1(CDR(s), ct);
	}
	break;
    case BCODESXP:
	{
	    int i, n;
	    SEXP consts = BCODE_CONSTS(s);
	    n = LENGTH(consts);
	    for (i = 0; i < n; i++)
		ScanForCircles1(VECTOR_ELT(consts, i), ct);
	}
	break;
    default: break;
    }
}

static SEXP ScanForCircles(SEXP s)
{
    SEXP ct;
    PROTECT(ct = MakeCircleHashTable());
    ScanForCircles1(s, ct);
    UNPROTECT(1);
    return CAR(ct);
}

static SEXP findrep(SEXP x, SEXP reps)
{
    for (; reps != R_NilValue; reps = CDR(reps))
	if (x == CAR(reps))
	    return reps;
    return R_NilValue;
}

1312
static void WriteBCLang (struct outpar *par, SEXP s, SEXP reps)
Radford Neal's avatar
Radford Neal committed
1313
{
1314 1315
    R_outpstream_t stream = par->stream;

Radford Neal's avatar
Radford Neal committed
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
    int type = TYPEOF(s);
    if (type == LANGSXP || type == LISTSXP) {
	SEXP r = findrep(s, reps);
	int output = TRUE;
	if (r != R_NilValue) {
	    /* we have a cell referenced more than once */
	    if (TAG(r) == R_NilValue) {
		/* this is the first reference, so update and register
		   the counter */
		int i = INTEGER(CAR(reps))[0]++;
1326
		SET_TAG(r, allocVector1INT());
Radford Neal's avatar
Radford Neal committed
1327
		INTEGER(TAG(r))[0] = i;
Radford Neal's avatar
Radford Neal committed
1328 1329
		OutInteger(par, BCREPDEF);
		OutInteger(par, i);
Radford Neal's avatar
Radford Neal committed
1330 1331 1332
	    }
	    else {
		/* we've seen it before, so just put out the index */
Radford Neal's avatar
Radford Neal committed
1333 1334
		OutInteger(par, BCREPREF);
		OutInteger(par, INTEGER(TAG(r))[0]);
Radford Neal's avatar
Radford Neal committed
1335 1336 1337 1338
		output = FALSE;
	    }
	}
	if (output) {
Radford Neal's avatar
Radford Neal committed
1339 1340 1341 1342 1343 1344 1345
	    SEXP attr = ATTRIB(s);
	    if (attr != R_NilValue) {
		switch(type) {
		case LANGSXP: type = ATTRLANGSXP; break;
		case LISTSXP: type = ATTRLISTSXP; break;
		}
	    }
Radford Neal's avatar
Radford Neal committed
1346
	    OutInteger(par, type);
Radford Neal's avatar
Radford Neal committed
1347
	    if (attr != R_NilValue)
1348 1349 1350 1351
		WriteItem (par, attr);
	    WriteItem (par, TAG(s));
	    WriteBCLang (par, CAR(s), reps);
	    WriteBCLang (par, CDR(s), reps);
Radford Neal's avatar
Radford Neal committed
1352 1353 1354
	}
    }
    else {
Radford Neal's avatar
Radford Neal committed
1355
	OutInteger(par, 0); /* pad */
1356
	WriteItem (par, s);
Radford Neal's avatar
Radford Neal committed
1357 1358 1359
    }
}

1360
static void WriteBC1 (struct outpar *par, SEXP s, SEXP reps)
Radford Neal's avatar
Radford Neal committed
1361
{
1362 1363
    R_outpstream_t stream = par->stream;

Radford Neal's avatar
Radford Neal committed
1364 1365 1366
    int i, n;
    SEXP code, consts;
    PROTECT(code = R_bcDecode(BCODE_CODE(s)));
1367
    WriteItem (par, code);
Radford Neal's avatar
Radford Neal committed
1368 1369
    consts = BCODE_CONSTS(s);
    n = LENGTH(consts);
Radford Neal's avatar
Radford Neal committed
1370
    OutInteger(par, n);
Radford Neal's avatar
Radford Neal committed
1371 1372 1373 1374 1375
    for (i = 0; i < n; i++) {
	SEXP c = VECTOR_ELT(consts, i);
	int type = TYPEOF(c);
	switch (type) {
	case BCODESXP:
Radford Neal's avatar
Radford Neal committed
1376
	    OutInteger(par, type);
1377
	    WriteBC1 (par, c, reps);
Radford Neal's avatar
Radford Neal committed
1378 1379 1380
	    break;
	case LANGSXP:
	case LISTSXP:
1381
	    WriteBCLang (par, c, reps);
Radford Neal's avatar
Radford Neal committed
1382 1383
	    break;
	default:
Radford Neal's avatar
Radford Neal committed
1384
	    OutInteger(par, type);
1385
	    WriteItem (par, c);
Radford Neal's avatar
Radford Neal committed
1386 1387 1388 1389 1390
	}
    }
    UNPROTECT(1);
}

1391
static void WriteBC (struct outpar *par, SEXP s)
Radford Neal's avatar
Radford Neal committed
1392
{
1393 1394
    R_outpstream_t stream = par->stream;

Radford Neal's avatar
Radford Neal committed
1395 1396
    SEXP reps = ScanForCircles(s);
    PROTECT(reps = CONS(R_NilValue, reps));
Radford Neal's avatar
Radford Neal committed
1397
    OutInteger(par, length(reps));
1398
    SETCAR(reps, allocVector1INT());
Radford Neal's avatar
Radford Neal committed
1399
    INTEGER(CAR(reps))[0] = 0;
1400
    WriteBC1 (par, s, reps);
Radford Neal's avatar
Radford Neal committed
1401 1402 1403
    UNPROTECT(1);
}

1404 1405 1406 1407
/* R_Serialize is accessible from outside.  R_Serialize_internal has the
   additional nosharing argument for use in this module. */

static void R_Serialize_internal (SEXP s, R_outpstream_t stream, int nosharing)
Radford Neal's avatar
Radford Neal committed
1408
{
Radford Neal's avatar
Radford Neal committed
1409 1410 1411 1412 1413 1414 1415
    struct outpar par;
    char buf [CBUF_SIZE];
    par.nosharing = nosharing;
    par.stream = stream;
    par.buf = buf;
    PROTECT(par.ref_table = MakeHashTable());

Radford Neal's avatar
Radford Neal committed
1416 1417 1418 1419 1420 1421
    int version = stream->version;

    OutFormat(stream);

    switch(version) {
    case 2:
Radford Neal's avatar
Radford Neal committed
1422 1423 1424
	OutInteger(&par, version);
	OutInteger(&par, R_VERSION);
	OutInteger(&par, R_Version(2,3,0));
Radford Neal's avatar
Radford Neal committed
1425 1426 1427 1428
	break;
    default: error(_("version %d not supported"), version);
    }

1429 1430
    WriteItem (&par, s);

Radford Neal's avatar
Radford Neal committed
1431 1432 1433
    UNPROTECT(1);
}

1434 1435 1436 1437 1438
void R_Serialize (SEXP s, R_outpstream_t stream)
{
    R_Serialize_internal (s, stream, FALSE);
}

Radford Neal's avatar
Radford Neal committed
1439

1440
/*** UNSERIALIZE CODE ***/
Radford Neal's avatar
Radford Neal committed
1441

1442
#define INITIAL_REFREAD_TABLE_SIZE 250
Radford Neal's avatar
Radford Neal committed
1443 1444 1445 1446 1447 1448 1449 1450

static SEXP MakeReadRefTable(void)
{
    SEXP data = allocVector(VECSXP, INITIAL_REFREAD_TABLE_SIZE);
    SET_TRUELENGTH(data, 0);
    return CONS(data, R_NilValue);
}

1451
static R_INLINE SEXP GetReadRef(SEXP table, int index)
Radford Neal's avatar
Radford Neal committed
1452 1453 1454 1455 1456 1457 1458 1459 1460
{
    int i = index - 1;
    SEXP data = CAR(table);

    if (i < 0 || i >= LENGTH(data))
	error(_("reference index out of range"));
    return VECTOR_ELT(data, i);
}

1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
static SEXP ExpandRefTable(SEXP table, SEXP value)
{
    SEXP data = CAR(table);
    int len = LENGTH(data);
    SEXP newdata;
    int i;

    PROTECT(value);
    newdata = allocVector(VECSXP, 2*len);
    for (i = 0; i < len; i++)
        SET_VECTOR_ELT(newdata, i, VECTOR_ELT(data, i));
    SETCAR(table, newdata);
    UNPROTECT(1);

    return newdata;
}

1478
static R_INLINE void AddReadRef(SEXP table, SEXP value)
Radford Neal's avatar
Radford Neal committed
1479 1480 1481
{
    SEXP data = CAR(table);
    int count = TRUELENGTH(data) + 1;
1482 1483
    if (count >= LENGTH(data)) 
        data = ExpandRefTable(table,value);
Radford Neal's avatar
Radford Neal committed
1484 1485 1486 1487
    SET_TRUELENGTH(data, count);
    SET_VECTOR_ELT(data, count - 1, value);
}

1488
static attribute_noinline SEXP InStringVec (struct inpar *par)
Radford Neal's avatar
Radford Neal committed
1489
{
1490 1491 1492
    R_inpstream_t stream = par->stream;
    SEXP ref_table = par->ref_table;

Radford Neal's avatar
Radford Neal committed
1493 1494
    SEXP s;
    int i, len;
Radford Neal's avatar
Radford Neal committed
1495
    if (InInteger(par) != 0)
Radford Neal's avatar
Radford Neal committed
1496
	error(_("names in persistent strings are not supported yet"));
Radford Neal's avatar
Radford Neal committed
1497
    len = InInteger(par);
Radford Neal's avatar
Radford Neal committed
1498 1499
    PROTECT(s = allocVector(STRSXP, len));
    for (i = 0; i < len; i++)
1500
	SET_STRING_ELT(s, i, ReadItem(par));
Radford Neal's avatar
Radford Neal committed
1501 1502 1503 1504
    UNPROTECT(1);
    return s;
}

Radford Neal's avatar
Radford Neal committed
1505
static attribute_noinline SEXP InCharSXP (struct inpar *par, int levs)
1506
{
Radford Neal's avatar
Radford Neal committed
1507 1508 1509 1510
    R_inpstream_t stream = par->stream;
    char *buf = par->buf;

    int length = InInteger(par); /* suppose still limited to 2^31-1 bytes */
1511 1512 1513 1514 1515