Commit 72127722 authored by Daniel Kochmański's avatar Daniel Kochmański

Merge branch 'stream-fixes' into 'develop'

ANSI Streams: Document extensions and fix various bugs

See merge request !140
parents 41eb59ab 44f460a7
Pipeline #51275882 passed with stage
......@@ -345,7 +345,7 @@ generic_read_byte_signed8(cl_object strm)
static void
generic_write_byte_signed8(cl_object byte, cl_object strm)
{
signed char c = fixint(byte);
signed char c = ecl_to_int8_t(byte);
strm->stream.ops->write_byte8(strm, (unsigned char *)&c, 1);
}
......@@ -808,9 +808,9 @@ ucs_4_encoder(cl_object stream, unsigned char *buffer, ecl_character c)
{
stream->stream.decoder = ucs_4be_decoder;
stream->stream.encoder = ucs_4be_encoder;
buffer[0] = 0xFF;
buffer[1] = 0xFE;
buffer[2] = buffer[3] = 0;
buffer[0] = buffer[1] = 0;
buffer[2] = 0xFE;
buffer[3] = 0xFF;
return 4 + ucs_4be_encoder(stream, buffer+4, c);
}
......@@ -929,8 +929,8 @@ ucs_2_encoder(cl_object stream, unsigned char *buffer, ecl_character c)
{
stream->stream.decoder = ucs_2be_decoder;
stream->stream.encoder = ucs_2be_encoder;
buffer[0] = 0xFF;
buffer[1] = 0xFE;
buffer[0] = 0xFE;
buffer[1] = 0xFF;
return 2 + ucs_2be_encoder(stream, buffer+2, c);
}
......@@ -4393,9 +4393,7 @@ make_sequence_input_stream(cl_object vector, cl_index istart, cl_index iend,
int byte_size;
int flags = 0;
if (!ECL_VECTORP(vector) ||
((type = ecl_array_elttype(vector)) < ecl_aet_b8 &&
type > ecl_aet_bc) ||
ecl_aet_size[type] != 1)
ecl_aet_size[type = ecl_array_elttype(vector)] != 1)
{
FEerror("MAKE-SEQUENCE-INPUT-STREAM only accepts vectors whose element has a size of 1 byte.~%~A", 1, vector);
}
......@@ -4406,29 +4404,12 @@ make_sequence_input_stream(cl_object vector, cl_index istart, cl_index iend,
strm = alloc_stream();
strm->stream.ops = duplicate_dispatch_table(&seq_in_ops);
strm->stream.mode = (short)ecl_smm_sequence_input;
if (!byte_size) {
#if defined(ECL_UNICODE)
if (ECL_BASE_STRING_P(vector)) {
if (Null(external_format))
external_format = @':default';
} else {
if (Null(external_format)) {
# ifdef WORDS_BIGENDIAN
external_format = @':ucs-4be';
# else
external_format = @':ucs-4le';
# endif
}
}
#else
if (Null(external_format)) {
external_format = @':default';
}
#endif
if (!byte_size && Null(external_format)) {
external_format = @':default';
}
set_stream_elt_type(strm, byte_size, flags, external_format);
/* Override byte size and elt type */
if (byte_size) strm->stream.byte_size = byte_size;
/* Override byte size */
if (byte_size) strm->stream.byte_size = 8;
SEQ_INPUT_VECTOR(strm) = vector;
SEQ_INPUT_POSITION(strm) = istart;
SEQ_INPUT_LIMIT(strm) = iend;
......@@ -4462,6 +4443,9 @@ seq_out_write_byte8(cl_object strm, unsigned char *c, cl_index n)
cl_fixnum delta = last - curr_pos;
if (delta < n) {
/* Not enough space, enlarge */
if (!ECL_ADJUSTABLE_ARRAY_P(vector)) {
FEerror("Can't adjust the dimensions of the sequence of sequence stream ~A", 1, strm);
}
vector = _ecl_funcall3(@'adjust-array', vector,
ecl_ash(ecl_make_fixnum(last), 1));
SEQ_OUTPUT_VECTOR(strm) = vector;
......@@ -4540,9 +4524,7 @@ make_sequence_output_stream(cl_object vector, cl_object external_format)
int byte_size;
int flags = 0;
if (!ECL_VECTORP(vector) ||
((type = ecl_array_elttype(vector)) < ecl_aet_b8 &&
type > ecl_aet_bc) ||
ecl_aet_size[type] != 1)
ecl_aet_size[type = ecl_array_elttype(vector)] != 1)
{
FEerror("MAKE-SEQUENCE-OUTPUT-STREAM only accepts vectors whose element has a size of 1 byte.~%~A", 1, vector);
}
......@@ -4553,29 +4535,12 @@ make_sequence_output_stream(cl_object vector, cl_object external_format)
strm = alloc_stream();
strm->stream.ops = duplicate_dispatch_table(&seq_out_ops);
strm->stream.mode = (short)ecl_smm_sequence_output;
if (!byte_size) {
#if defined(ECL_UNICODE)
if (ECL_BASE_STRING_P(vector)) {
if (Null(external_format))
external_format = @':default';
} else {
if (Null(external_format)) {
# ifdef WORDS_BIGENDIAN
external_format = @':ucs-4be';
# else
external_format = @':ucs-4le';
# endif
}
}
#else
if (Null(external_format)) {
external_format = @':default';
}
#endif
if (!byte_size && Null(external_format)) {
external_format = @':default';
}
set_stream_elt_type(strm, byte_size, flags, external_format);
/* Override byte size and elt type */
if (byte_size) strm->stream.byte_size = byte_size;
/* Override byte size */
if (byte_size) strm->stream.byte_size = 8;
SEQ_OUTPUT_VECTOR(strm) = vector;
SEQ_OUTPUT_POSITION(strm) = vector->vector.fillp;
return strm;
......
......@@ -1791,7 +1791,7 @@ cl_symbols[] = {
{EXT_ "*ACTION-ON-UNDEFINED-VARIABLE*", EXT_SPECIAL, NULL, -1, ECL_NIL},
{SYS_ "SET-BUFFERING-MODE", SI_ORDINARY, si_set_buffering_mode, 2, OBJNULL},
{EXT_ "SET-BUFFERING-MODE", EXT_ORDINARY, si_set_buffering_mode, 2, OBJNULL},
{KEY_ "NONE", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "LINE-BUFFERED", KEYWORD, NULL, -1, OBJNULL},
{KEY_ "FULLY-BUFFERED", KEYWORD, NULL, -1, OBJNULL},
......
......@@ -1791,7 +1791,7 @@ cl_symbols[] = {
{EXT_ "*ACTION-ON-UNDEFINED-VARIABLE*",NULL},
{SYS_ "SET-BUFFERING-MODE","si_set_buffering_mode"},
{EXT_ "SET-BUFFERING-MODE","si_set_buffering_mode"},
{KEY_ "NONE",NULL},
{KEY_ "LINE-BUFFERED",NULL},
{KEY_ "FULLY-BUFFERED",NULL},
......
......@@ -55,14 +55,14 @@ and the table of known symbols is shown below. Note how some symbols (@code{:cr}
@item @code{:cr} @tab @code{#\Newline} is @code{Carriage Return} @tab No
@item @code{:crlf} @tab @code{#\Newline} is @code{Carriage Return} followed by @code{Linefeed} @tab No
@item @code{:lf} @tab @code{#\Newline} is @code{Linefeed} @tab No
@item @code{:little-endian} @tab Modify UCS to use little endian encoding. @tab No
@item @code{:big-endian} @tab Modify UCS to use big endian encoding. @tab No
@item @code{:little-endian} @tab Modify UCS to use little-endian encoding. @tab No
@item @code{:big-endian} @tab Modify UCS to use big-endian encoding. @tab No
@item @code{:utf-8 :utf8} @tab Unicode UTF-8 @tab Yes
@item @code{:ucs-2 :ucs2 :utf-16 :utf16 :unicode} @tab UCS-2 encoding with BOM. @tab Yes
@item @code{:ucs-2le :ucs2le :utf-16le} @tab UCS-2 with big-endian encoding @tab Yes
@item @code{:ucs-2 :ucs2 :utf-16 :utf16 :unicode} @tab UCS-2 encoding with BOM. Defaults to big-endian when writing or if no BOM is detected when reading. @tab Yes
@item @code{:ucs-2le :ucs2le :utf-16le} @tab UCS-2 with little-endian encoding @tab Yes
@item @code{:ucs-2be :ucs2be :utf-16be} @tab UCS-2 with big-endian encoding @tab Yes
@item @code{:ucs-4 :ucs4 :utf-32 :utf32} @tab UCS-4 encoding with BOM. @tab Yes
@item @code{:ucs-4le :ucs4le :utf-32le} @tab UCS-4 with big-endian encoding @tab Yes
@item @code{:ucs-4 :ucs4 :utf-32 :utf32} @tab UCS-4 encoding with BOM. Defaults to big-endian when writing or if no BOM is detected when reading. @tab Yes
@item @code{:ucs-4le :ucs4le :utf-32le} @tab UCS-4 with little-endian encoding @tab Yes
@item @code{:ucs-4be :ucs4be :utf-32be} @tab UCS-4 with big-endian encoding @tab Yes
@item @code{:iso-8859-1 :iso8859-1 :latin-1 :cp819 :ibm819} @tab Latin-1 encoding @tab Yes
@item @code{:iso-8859-2 :iso8859-2 :latin-2 :latin2} @tab Latin-2 encoding @tab Yes
......@@ -107,21 +107,62 @@ and the table of known symbols is shown below. Note how some symbols (@code{:cr}
@node Streams - Dictionary
@subsection Dictionary
@lspindex open
@lspindex si:set-buffering-mode
@defun open filename &key (direction :input) (element-type character) if-exists if-does-not-exist (external-format :default) (cstream t)
Open a file stream
@subsubheading Synopsis
@table @var
@item cstream
A generalized boolean; If true, internally use a C @code{FILE*} stream, else a POSIX file descriptor
@end table
@subsubsection Sequence Streams
@lspindex ext:sequence-stream
@deftp {System Class} ext:sequence-stream
@subsubheading Class Precedence List
@code{ext:sequence-stream, stream, t}
@subsubheading Description
All keyword arguments except @var{cstream} behave as specified by the ANSI standard @bibcite{ANSI}.
Sequence streams work similar to string streams for vectors. The
supplied vectors that the streams read from or write to must have a
byte sized element type, i.e. @code{(signed-byte 8)},
@code{(unsigned-byte 8)} or @code{base-char}.
The semantics depend on the vector element type and the external
format of the stream. If no external format is supplied and the
element type is an integer type, the stream is a binary stream and
accepts only integers of the same type as the element type of the
vector. Otherwise, the stream accepts both characters and integers and
converts them using the given external format. If the element type is
@code{base-char}, the elements of the vectors are treated as bytes.
This means that writing a character may use multiple elements of the
vector, whose @code{char-code}s will be equal to the values of the
bytes comprising the character in the given external format.
@end deftp
@lspindex ext:make-sequence-input-stream
@defun ext:make-sequence-input-stream vector &key (start 0) (end nil) (external-format nil)
Create a sequence input stream with the subsequence bounded by
@var{start} and @var{end} of the given vector.
@end defun
@lspindex ext:make-sequence-output-stream
@defun ext:make-sequence-output-stream vector &key (external-format nil)
Create a sequence output stream.
@end defun
@exindex Using sequence streams
Example:
Using sequence streams to convert to a UTF8 encoded base string
@lisp
CL-USER> (defvar *output* (make-array 20 :element-type 'base-char :adjustable t :fill-pointer 0))
*OUTPUT*
CL-USER> (defvar *stream* (ext:make-sequence-output-stream *output* :external-format :utf-8))
*STREAM*
CL-USER> (write-string "Spätzle mit Soß'" *stream*)
"Spätzle mit Soß'"
CL-USER> *output*
"Spätzle mit SoÃ\237'"
@end lisp
@subsubsection File Stream Extensions
@defun si:set-buffering-mode stream mode
@lspindex ext:set-buffering-mode
@defun ext:set-buffering-mode stream mode
Control the buffering mode of a stream
@subsubheading Synopsis
@table @var
......@@ -129,11 +170,140 @@ Control the buffering mode of a stream
an ANSI stream
@item mode
one of @code{nil}, @code{:none}, @code{:line}, @code{:line-buffered}, @code{:full} or @code{:full-buffered}
@item returns
The supplied stream
@end table
@subsubheading Description
If @var{mode} is @code{nil} or @code{:none}, @var{stream} will not be buffered, if it is @code{:line} or @code{:line-buffered} resp. @code{:full} or @code{:full-buffered}, @var{stream} will be line resp. fully buffered. Streams created with the @code{:cstream} argument to @code{open} set to @code{nil} will never be buffered.
If @var{mode} is @code{nil} or @code{:none}, @var{stream} will not be
buffered, if it is @code{:line} or @code{:line-buffered} resp.
@code{:full} or @code{:fully-buffered}, @var{stream} will be line resp.
fully buffered. If the stream does not support buffering, nothing will
happen.
@end defun
@lspindex ext:file-stream-fd
@defun ext:file-stream-fd file-stream
Return the POSIX file descriptor of @var{file-stream} as an integer
@end defun
@subsubsection External Format Extensions
@lspindex ext:all-encodings
@defun ext:all-encodings
Return a list of all supported external formats
@end defun
@lspindex ext:character-coding-error
@deftp Condition ext:character-coding-error
Character coding error
@subsubheading Class Precedence List
@code{ext:character-coding-error, error, serious-condition, condition, t}
@subsubheading Methods
@lspindex ext:character-coding-error-external-format
@defun ext:character-coding-error-external-format condition
@table @var
@item returns
The external format of @var{condition}
@end table
@end defun
@subsubheading Description
Superclass of @code{ext:character-encoding-error} and @code{ext:character-decoding-error}.
@end deftp
@lspindex ext:character-encoding-error
@deftp Condition ext:character-encoding-error
Character encoding error
@subsubheading Class Precedence List
@code{ext:character-encoding-error, ext:character-coding-error, error, serious-condition, condition, t}
@subsubheading Methods
@lspindex ext:character-encoding-error-code
@defun ext:character-encoding-error-code condition
@table @var
@item returns
The character code of the character, which can't be encoded
@end table
@end defun
@subsubheading Description
Condition for characters, which can't be encoded with some external
format.
@end deftp
@lspindex ext:character-decoding-error
@deftp Condition ext:character-decoding-error
Character decoding error
@subsubheading Class Precedence List
@code{ext:character-decoding-error, ext:character-coding-error, error, serious-condition, condition, t}
@subsubheading Methods
@lspindex ext:character-decoding-error-octects
@defun ext:character-decoding-error-octects condition
@table @var
@item returns
A list of integers with the values of the @code{unsigned char}'s which
can't be decoded.
@end table
@end defun
@subsubheading Description
Condition for characters, which can't be decoded with some external
format.
@end deftp
@lspindex ext:stream-encoding-error
@deftp Condition ext:stream-encoding-error
Stream encoding error
@subsubheading Class Precedence List
@code{ext:stream-encoding-error, ext:character-encoding-error, ext:character-coding-error, stream-error, error, serious-condition, condition, t}
@subsubheading Description
This condition is signaled when trying to write a character to a
stream, which can't be encoded with the streams external format.
@end deftp
@lspindex ext:stream-decoding-error
@deftp Condition ext:stream-decoding-error
Stream decoding error
@subsubheading Class Precedence List
@code{ext:stream-decoding-error, ext:character-decoding-error, ext:character-coding-error, stream-error, error, serious-condition, condition, t}
@subsubheading Description
This condition is signaled when trying to read a character from a
stream, which can't be decoded with the streams external format.
@end deftp
@lspindex ext:encoding-error
@defun ext:encoding-error stream external-format code
Signal a @code{ext:stream-encoding-error} with the given
@var{external-format} and @var{code}. Make a restart available so
that the error can be ignored or the character can be replaced with a
different one.
@end defun
@lspindex ext:decoding-error
@defun ext:decoding-error stream external-format octects
Signal a @code{ext:stream-decoding-error} with the given
@var{external-format} and @var{octets}. Make a restart available so
that the error can be ignored or the octets can be replaced with a
character.
@end defun
@node Streams - C Reference
@subsection C Reference
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment