Commit 853f5107 by Bertrand Marc

Initial code to write sas7bdat files.

parent ae97f3c9
......@@ -62,6 +62,7 @@ int read_subheader(struct sas7bdat_handler_t *dataset, struct subheader_pointer_
assert( subheader->row_size.page_size == dataset->header.page_size );
printf("%d, %d\n", subheader->row_size.end_of_header[0], subheader->row_size.end_of_header[1]);
assert( subheader->row_size.end_of_header[0] == (SAS7BDAT_LONG) -1 && subheader->row_size.end_of_header[1] == (SAS7BDAT_LONG) -1 );
printf("ROWSIZE mandatory short : %d\n", subheader->row_size.unknown5);
break;
case(SUBH_COLSIZE):
#ifdef VERBOSE
......
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <errno.h>
#include <unistd.h>
#include <assert.h>
#include "../header.h"
#include "../page.h"
#include "../sas7bdat_struct.h"
#include "../sas7bdat.h"
#define PAGE_SIZE 4096
void check_sizes() {
assert(sizeof(struct header64_t) == 1024);
assert(sizeof(struct row_size_subheader_t) == 480);
assert(sizeof(struct column_size_subheader_t) == 12);
assert(sizeof(struct subheader_count_subheader_t) == 304);
assert(offsetof(struct header_ILP32, timestamp) == 164);
assert(offsetof(struct header_ILP32, release) == 216);
}
int main() {
check_sizes();
/* input:
- filename
- vector length
*/
size_t page_size = PAGE_SIZE;/* sysconf(_SC_PAGESIZE); */
char filename[] = "machin.sas7bdat";
struct sas7bdat_handler_t *dataset = sas7bdat_init();
dataset->filename = filename;
dataset->reader.page = malloc(sizeof(char) * page_size);
if(dataset->reader.page == NULL) {
fprintf(stderr, "Error: could not alloc memory.\n");
return EXIT_FAILURE;
}
printf("TEST2\n");
_sas7bdat_init_header(dataset);
dataset->fp = fopen(filename, "wb");
if(dataset->fp == NULL) {
fprintf(stderr, "Error: could not open file %s, %s.\n", filename, strerror(errno));
return EXIT_FAILURE;
}
printf("TEST3\n");
fwrite(&dataset->header, sizeof(struct header_ILP32), 1, dataset->fp);
/* TODO Compute row length */
int row_length = 0;
int row_count = 1;
int column_count = 0;
/* Write pages */
dataset->reader.page_n = 1;
dataset->reader.page->any.type = MIX;
dataset->reader.page->meta.unknown1[0] = dataset->reader.page_n ^ dataset->header.unknown6_3;
dataset->reader.page->meta.nb_subheader_pointers = 0;
void *subheader = (void*) (dataset->reader.page) + page_size;
/* ROW_SIZE */
subheader -= sizeof(struct row_size_subheader_t);
struct row_size_subheader_t *row_size = (struct row_size_subheader_t*) (subheader);
row_size->type = SUBH_ROWSIZE;
row_size->test[0] = 240;
//row_size->test[1] = 7;
row_size->row_length = row_length;
row_size->row_count = row_count;
row_size->column_count1 = column_count;
row_size->column_count2 = 0;
row_size->page_size = page_size;
row_size->max_row_count = 2808;
row_size->end_of_header[0] = -1;
row_size->end_of_header[1] = -1;
row_size->page_signature[0] = dataset->reader.page->meta.unknown1[0];
row_size->unknown4 = 1;
row_size->unknown5 = 2;
row_size->pages_with_subheaders = 1;
row_size->subheaders_with_positive_length_on_last_page = 4;
//row_size->pages_with_subheaders2 = row_size->pages_with_subheaders;
//row_size->subheaders_with_positive_length_on_last_page2 = 6;
row_size->page_count = 1;
row_size->unknown6 = 6;
row_size->coltext_number = 1;
/* subheader pointer */
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = sizeof(struct row_size_subheader_t);
++dataset->reader.page->meta.nb_subheader_pointers;
/* COL_SIZE */
subheader -= sizeof(struct column_size_subheader_t);
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = sizeof(struct column_size_subheader_t);
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
struct column_size_subheader_t *col_size = (struct column_size_subheader_t*) (subheader);
col_size->type = SUBH_COLSIZE;
col_size->column_count = column_count;
++dataset->reader.page->meta.nb_subheader_pointers;
/* SUBH_CNT */
subheader -= sizeof(struct subheader_count_subheader_t);
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = sizeof(struct subheader_count_subheader_t);
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
struct subheader_count_subheader_t *sub_cnt = (struct subheader_count_subheader_t*) (subheader);
sub_cnt->type = SUBH_SUBHCNT;
sub_cnt->offset = 36;
sub_cnt->unknown = 1L;
sub_cnt->unknown2 = 7;
sub_cnt->subheader_count_vector[0] = (struct subheader_count_vector_t) { .signature = -4, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
sub_cnt->subheader_count_vector[1] = (struct subheader_count_vector_t) { .signature = -3, .page1 = 1, .loc1 = 4, .pagel = 1, .locl = 4 };
sub_cnt->subheader_count_vector[2] = (struct subheader_count_vector_t) { .signature = -1, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
sub_cnt->subheader_count_vector[3] = (struct subheader_count_vector_t) { .signature = -2, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
sub_cnt->subheader_count_vector[4] = (struct subheader_count_vector_t) { .signature = -5, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
sub_cnt->subheader_count_vector[5] = (struct subheader_count_vector_t) { .signature = -6, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
sub_cnt->subheader_count_vector[6] = (struct subheader_count_vector_t) { .signature = -7, .page1 = 0, .loc1 = 0, .pagel = 0, .locl = 0 };
++dataset->reader.page->meta.nb_subheader_pointers;
/* COLTEXT */
subheader -= 48;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = 48;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].unknown = 256;
struct column_text_subheader_t *col_text = (struct column_text_subheader_t*) (subheader);
col_text->type = SUBH_COLTEXT;
col_text->remaining_length = 48 - 12;
/* Text should be 4 bytes aligned */
strncpy(subheader + 12, " DATASTEP", 24);
++dataset->reader.page->meta.nb_subheader_pointers;
/* COLNAME */
/*subheader -= 28;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = 28;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
struct column_name_subheader_t *col_name = (struct column_name_subheader_t*) (subheader);
col_name->type = SUBH_COLNAME;
col_name->remaining_length = 28 - 12;
col_name->column_name_pointer[0] = (struct column_name_pointer_t) { .index = 0, .offset = 32, .length = 4, .sortedby = 0 };
++dataset->reader.page->meta.nb_subheader_pointers;*/
/* COLATTR */
/*subheader -= 28;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = 28;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
struct column_name_subheader_t *col_name = (struct column_name_subheader_t*) (subheader);
col_name->type = SUBH_COLNAME;
col_name->remaining_length = 28 - 12;
col_name->column_name_pointer[0] = (struct column_name_pointer_t) { .index = 0, .offset = 32, .length = 4, .sortedby = 0 };
++dataset->reader.page->meta.nb_subheader_pointers;*/
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].length = 0;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].offset = subheader - (void*) dataset->reader.page;
dataset->reader.page->meta.subheader_pointers[dataset->reader.page->meta.nb_subheader_pointers].unknown = 1;
++dataset->reader.page->meta.nb_subheader_pointers;
dataset->reader.page->any.nb_data_block = dataset->reader.page->meta.nb_subheader_pointers;
/* TEST */
/* FILE *fp = fopen("../datasets/test32.sas7bdat", "rb");
fseek(fp, 1024, SEEK_SET);
fread(dataset->reader.page, 20, 1, fp);
fclose(fp);
dataset->reader.page->meta.unknown1[0] = dataset->reader.page_n ^ dataset->header.unknown6_3; */
/* /TEST */
fwrite(dataset->reader.page, page_size, 1, dataset->fp);
++dataset->reader.page_n;
/* finalize */
free(dataset->reader.page);
//fseek(dataset->fp, 0, SEEK_SET);
//fwrite(&dataset->header, sizeof(struct header_ILP32), 1, dataset->fp);
fclose(dataset->fp);
return EXIT_SUCCESS;
}
......@@ -27,6 +27,8 @@
#include "sas7bdat_struct.h"
#include "sas7bdat.h"
#define HEADER_SIZE 1024
/* Read header and extract infos */
int read_header(struct sas7bdat_handler_t *dataset) {
/* Check Magic number */
......@@ -171,3 +173,64 @@ int read_header(struct sas7bdat_handler_t *dataset) {
return EXIT_SUCCESS;
}
int _sas7bdat_init_header(struct sas7bdat_handler_t *dataset) {
const unsigned char magic[32] = {
'\x0' , '\x0' , '\x0' , '\x0' , '\x0' , '\x0' , '\x0' , '\x0' ,
'\x0' , '\x0' , '\x0' , '\x0' , '\xc2', '\xea', '\x81', '\x60',
'\xb3', '\x14', '\x11', '\xcf', '\xbd', '\x92', '\x8' , '\x0' ,
'\x9' , '\xc7', '\x31', '\x8c', '\x18', '\x1f', '\x10', '\x11'
};
memcpy(&dataset->header.magic, magic, 32);
dataset->header.align[0] = '\x22';
dataset->header.align[1] = '\x22';
dataset->header.align[2] = '\x00';
dataset->header.align[3] = '\x32'; /* Alignement of timestamp: 0/+4 */
dataset->header.word_size = '\x22'; /* Word size : 32bits */
#ifdef WORDS_BIGENDIAN /* Endianness */
dataset->header.endianness = '\x00';
#else
dataset->header.endianness = '\x01';
#endif
//#if defined _WIN64 || defined _WIN32
memcpy(&dataset->header.OS_family, "\0022\004\0\0\0\0\0\0\0\0\0\003\001", 14);
/*#else
memcpy(&dataset->header.OS_family, "\0021\001\0\0\0\0\0\0\0\0\0\002\001", 14);
#endif*/
memcpy(&dataset->header.repeat, dataset->header.align - 4, 13);
memcpy(&dataset->header.unknown3, "2\001\"\"\0", 5);
dataset->header.encoding = 62; /* ascii */
dataset->header.unknown32 = 0x1000;
memcpy(&dataset->header.repeat2, dataset->header.repeat - 2, 2);
memcpy(&dataset->header.SASFILE, "SAS FILE", 8);
strncpy(dataset->header.dataset_name, "MACHIN ", sizeof(dataset->header.dataset_name));
memcpy(&dataset->header.file_type, "DATA ", 8);
time_t now = time(NULL);
dataset->header.timestamp[0] = write_SAS_time(now);
dataset->header.timestamp[1] = dataset->header.timestamp[0];
dataset->header.unknown4_1[0] = difftime(now, mktime(gmtime(&now)));
dataset->header.unknown4_1[1] = dataset->header.unknown4_1[0];
dataset->header.header_size = HEADER_SIZE;
dataset->header.page_size = 4096;
dataset->header.page_count = 1;
strncpy(dataset->header.release, "9.0101M3", sizeof(dataset->header.release));
strncpy(dataset->header.host, "XP_PRO", sizeof(dataset->header.host));
dataset->header.check[0] = *((int*) dataset->header.timestamp);
dataset->header.check[1] = dataset->header.check[0] ^ 0x656f4e4c;
dataset->header.check[2] = dataset->header.check[1];
dataset->header.check[3] = dataset->header.check[1];
dataset->header.unknown6_3 = 1166162672;
memcpy(dataset->header.timestamp2, dataset->header.timestamp, 8);
return EXIT_SUCCESS;
}
......@@ -88,5 +88,6 @@ struct header_ILP32 {
#include "sas7bdat.h"
int read_header(SAS7BDAT*);
int _sas7bdat_init_header(SAS7BDAT*);
#endif /* SAS7BDAT_HEADER_H */
......@@ -7,6 +7,9 @@ EXEC=libsas7bdat
all: $(EXEC)
write: $(OBJ) examples/write.o
$(CC) -o $@ $^ $(LDFLAGS)
$(EXEC): $(OBJ) examples/test.o
$(CC) -o $@ $^ $(LDFLAGS)
......
/*
libsas7bdat: minimalist library to read sas7bdat files
Copyright (C) 2012 Bertrand Marc
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdlib.h>
#include "subheader.h"
#define PAGE_SIZE 4096
struct row_size_subheader_t _sas7bdat_init_row_size_subheader(int row_length, int row_count, int column_count, int page_size) {
struct row_size_subheader_t sh = {
.type = SUBH_ROWSIZE,
.row_length = row_length,
.row_count = row_count,
.column_count1 = column_count,
.column_count2 = 0,
.page_size = page_size,
.max_row_count = page_size/row_count,
.end_of_header = { -1, -1 }
};
return sh;
}
struct column_size_subheader_t _sas7bdat_init_column_size_subheader(int column_count) {
struct column_size_subheader_t subh = { .type = SUBH_COLSIZE, .column_count = column_count };
return subh;
}
struct column_attributes_subheader_t* _sas7bdat_init_column_attributes_subheader(int column_count) {
struct column_attributes_subheader_t *subh = malloc(sizeof(struct column_attributes_subheader_t) + column_count * sizeof(struct column_attributes_t));
subh->type = SUBH_COLATTR;
/* subh->column_attributes[0].offset subh->column_attributes[0].width */
return subh;
}
struct column_name_subheader_t* _sas7bdat_init_column_name_subheader(int column_count) {
struct column_name_subheader_t *subh = malloc(sizeof(struct column_attributes_subheader_t) + column_count * sizeof(struct column_attributes_t));
subh->type = SUBH_COLNAME;
/* subh->column_name_pointer[0].offset subh->column_name_pointer[0].width */
return subh;
}
......@@ -86,7 +86,8 @@ struct row_size_subheader_t {
short zeros4[4];
short unknown10[5];
int unknown11[9];
short unknown12[5];
short unknown12[2];
short coltext_number, MXNAM, MXLAB;
short zeros5[6];
short data_rows_on_full_page;
int todo2[7];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment