Commit c00837c2 authored by Petr Machata's avatar Petr Machata

Support wide character strings

- "string" lens and "format" pack were extended such that using an
  integer as underlying array type denotes a wide character string.

- several prototypes from wchar.h were added to libc.so.conf.

- ltrace.conf.5 was updated
parent 0ffc085b
......@@ -127,6 +127,13 @@
| void func(int*, int*, +long*, long*); |
| void func(in int*, in int*, out long*, out long*); |
This is useful in particular for:
| ulong mbsrtowcs(+string(array(uint, zero(arg3))), string*, ulong, addr); |
Where we would like to render arg2 on the way in, and arg1 on the
way out.
But sometimes we may want to see a different type on the way in and
on the way out. E.g. in asprintf, what's interesting on the way in
is the address, but on the way out we want to see buffer contents.
......
......@@ -43,6 +43,7 @@ string tgoto(string, int, int);
# POSIX always uses pointer to the structure, so it's fine.
typedef DIR = struct(int);
typedef FILE = addr;
# XXX We can't represent the following portably without having either
# uulong, or directly uint64_t.'
......@@ -400,7 +401,36 @@ addr getutent();
void setutent();
# wchar.h
int fwide(addr, int);
typedef wchar_t = string(uint);
typedef wint_t = string(int);
typedef wstring_t = string(array(uint, zero)*);
int fwide(FILE*, int);
wint_t btowc(int);
wint_t fgetwc(FILE*);
wstring_t fgetws(+string(array(uint, zero(arg2))*), int, FILE*);
wint_t fputwc(wchar_t, FILE*);
int fputws(wstring_t, FILE*);
int fwprintf(FILE*, format(wstring_t));
; int fwscanf(FILE *restrict, const wchar_t *restrict, ...);
wint_t getwc(FILE *);
wint_t getwchar();
int iswalnum(wint_t);
int iswalpha(wint_t);
int iswcntrl(wint_t);
; int iswctype(wint_t, wctype_t);
int iswdigit(wint_t);
int iswgraph(wint_t);
int iswlower(wint_t);
int iswprint(wint_t);
int iswpunct(wint_t);
int iswspace(wint_t);
int iswupper(wint_t);
int iswxdigit(wint_t);
ulong mbrlen(string, ulong, addr);
ulong mbrtowc(+wchar_t*, string[arg3], ulong, addr);
int mbsinit(addr);
ulong mbsrtowcs(+string(array(uint, zero(arg3))), string*, ulong, addr);
# sys/wait.h
int wait(addr);
......
......@@ -123,13 +123,8 @@ acc_fprintf(int *countp, FILE *stream, const char *format, ...)
}
static int
format_char(FILE *stream, struct value *value, struct value_dict *arguments)
print_char(FILE *stream, int c)
{
long lc;
if (value_extract_word(value, &lc, arguments) < 0)
return -1;
int c = (int)lc;
const char *fmt;
switch (c) {
case -1:
......@@ -173,13 +168,23 @@ format_char(FILE *stream, struct value *value, struct value_dict *arguments)
}
static int
format_naked_char(FILE *stream, struct value *value,
struct value_dict *arguments)
format_char(FILE *stream, struct value *value, struct value_dict *arguments)
{
long lc;
if (value_extract_word(value, &lc, arguments) < 0)
return -1;
return print_char(stream, (int) lc);
}
static int
format_naked(FILE *stream, struct value *value,
struct value_dict *arguments,
int (*what)(FILE *, struct value *, struct value_dict *))
{
int written = 0;
if (acc_fprintf(&written, stream, "'") < 0
|| account_output(&written,
format_char(stream, value, arguments)) < 0
what(stream, value, arguments)) < 0
|| acc_fprintf(&written, stream, "'") < 0)
return -1;
......@@ -339,7 +344,7 @@ done:
* OPEN, CLOSE, DELIM are opening and closing parenthesis and element
* delimiter.
*/
int
static int
format_array(FILE *stream, struct value *value, struct value_dict *arguments,
struct expr_node *length, size_t maxlen, int before,
const char *open, const char *close, const char *delim)
......@@ -407,7 +412,8 @@ toplevel_format_lens(struct lens *lens, FILE *stream,
case ARGTYPE_CHAR:
if (int_fmt == INT_FMT_default)
return format_naked_char(stream, value, arguments);
return format_naked(stream, value, arguments,
&format_char);
return format_integer(stream, value, int_fmt, arguments);
case ARGTYPE_FLOAT:
......@@ -542,6 +548,47 @@ struct lens bool_lens = {
.format_cb = bool_lens_format_cb,
};
static int
redispatch_as_array(struct lens *lens, FILE *stream,
struct value *value, struct value_dict *arguments,
int (*cb)(struct lens *, FILE *,
struct value *, struct value_dict *))
{
struct arg_type_info info[2];
type_init_array(&info[1], value->type->u.ptr_info.info, 0,
expr_node_zero(), 0);
type_init_pointer(&info[0], &info[1], 0);
info->lens = lens;
info->own_lens = 0;
struct value tmp;
if (value_clone(&tmp, value) < 0)
return -1;
value_set_type(&tmp, info, 0);
int ret = cb(lens, stream, &tmp, arguments);
type_destroy(&info[0]);
type_destroy(&info[1]);
value_destroy(&tmp);
return ret;
}
static int
format_wchar(FILE *stream, struct value *value, struct value_dict *arguments)
{
long l;
if (value_extract_word(value, &l, arguments) < 0)
return -1;
wchar_t wc = (wchar_t) l;
char buf[MB_CUR_MAX + 1];
int c = wctomb(buf, wc);
if (c < 0)
return -1;
if (c == 1)
return print_char(stream, buf[0]);
buf[c] = 0;
return fprintf(stream, "%s", buf) >= 0 ? 1 : -1;
}
static int
string_lens_format_cb(struct lens *lens, FILE *stream,
......@@ -554,39 +601,39 @@ string_lens_format_cb(struct lens *lens, FILE *stream,
* I suspect people are so used to the char * C idiom,
* that string(char *) might actually turn up. So
* let's just support it. */
if (value->type->u.ptr_info.info->type == ARGTYPE_CHAR) {
struct arg_type_info info[2];
type_init_array(&info[1],
value->type->u.ptr_info.info, 0,
expr_node_zero(), 0);
type_init_pointer(&info[0], &info[1], 0);
info->lens = lens;
info->own_lens = 0;
struct value tmp;
if (value_clone(&tmp, value) < 0)
return -1;
value_set_type(&tmp, info, 0);
int ret = string_lens_format_cb(lens, stream, &tmp,
arguments);
type_destroy(&info[0]);
type_destroy(&info[1]);
value_destroy(&tmp);
return ret;
}
/* fall-through */
switch ((int) value->type->u.ptr_info.info->type)
case ARGTYPE_CHAR:
case ARGTYPE_SHORT:
case ARGTYPE_USHORT:
case ARGTYPE_INT:
case ARGTYPE_UINT:
case ARGTYPE_LONG:
case ARGTYPE_ULONG:
return redispatch_as_array(lens, stream, value,
arguments,
&string_lens_format_cb);
/* Otherwise dispatch to whatever the default for the
* pointee is--most likely this will again be us. */
/* Fall through. */
case ARGTYPE_VOID:
case ARGTYPE_FLOAT:
case ARGTYPE_DOUBLE:
case ARGTYPE_STRUCT:
return toplevel_format_lens(lens, stream, value,
arguments, INT_FMT_default);
case ARGTYPE_SHORT:
case ARGTYPE_INT:
case ARGTYPE_LONG:
case ARGTYPE_USHORT:
case ARGTYPE_UINT:
case ARGTYPE_ULONG:
return toplevel_format_lens(lens, stream, value,
arguments, INT_FMT_default);
if (value->parent != NULL && value->type->lens == NULL)
return format_wchar(stream, value, arguments);
else
return format_naked(stream, value, arguments,
&format_wchar);
case ARGTYPE_CHAR:
return format_char(stream, value, arguments);
......
......@@ -21,10 +21,11 @@
#include "config.h"
#include <limits.h>
#include <sys/param.h>
#include <sys/wait.h>
#include <errno.h>
#include <limits.h>
#include <locale.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
......@@ -99,7 +100,10 @@ normal_exit(void)
}
void
ltrace_init(int argc, char **argv) {
ltrace_init(int argc, char **argv)
{
setlocale(LC_ALL, "");
struct opt_p_t *opt_p_tmp;
atexit(normal_exit);
......
.\" -*-nroff-*-
.\" Copyright (c) 2012 Petr Machata, Red Hat Inc.
.\" Copyright (c) 2012, 2013 Petr Machata, Red Hat Inc.
.\" Copyright (c) 1997-2005 Juan Cespedes <[email protected]>
.\"
.\" This program is free software; you can redistribute it and/or
......@@ -171,13 +171,16 @@ pointer to 256-bit bit vector.
.RS
The first form of the argument is canonical, the latter two are
syntactic sugar. In the canonical form, the function argument is
formatted as string. The \fITYPE\fR shall be either a \fBchar*\fR, or
\fBarray(char,\fIEXPR\fB)\fR, or \fBarray(char,\fIEXPR\fB)*\fR. If an
array is given, the length will typically be a \fBzero\fR expression
(but doesn't have to be). Using argument that is plain array
(i.e. not a pointer to array) makes sense e.g. in C structs, in cases
like \fBstruct(string(array(char, \fR6\fB)))\fR, which describes the C
type \fBstruct {char \fRs\fB[\fR6\fB];}\fR.
formatted as string. The \fITYPE\fR can have either of the following
forms: \fIX\fB*\fR, or \fBarray(\fIX\fB,\fIEXPR\fB)\fR, or
\fBarray(\fIX\fB,\fIEXPR\fB)*\fR. \fIX\fR is either \fBchar\fR for
normal strings, or an integer type for wide-character strings.
If an array is given, the length will typically be a \fBzero\fR
expression (but doesn't have to be). Using argument that is plain
array (i.e. not a pointer to array) makes sense e.g. in C structs, in
cases like \fBstruct(string(array(char, \fR6\fB)))\fR, which describes
the C type \fBstruct {char \fRs\fB[\fR6\fB];}\fR.
Because simple C-like strings are pretty common, there are two
shorthand forms. The first shorthand form (with brackets) means the
......
......@@ -22,7 +22,9 @@
*/
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "printf.h"
#include "type.h"
......@@ -39,6 +41,7 @@ struct param_enum {
char *format;
char const *ptr;
char const *end;
size_t width;
};
static struct param_enum *
......@@ -47,12 +50,30 @@ param_printf_init(struct value *cb_args, size_t nargs,
{
assert(nargs == 1);
/* We expect a char array pointer. */
struct process *proc = cb_args[0].inferior;
assert(proc != NULL);
/* We expect a pointer to array. */
if (cb_args->type->type != ARGTYPE_POINTER
|| cb_args->type->u.ptr_info.info->type != ARGTYPE_ARRAY
|| (cb_args->type->u.ptr_info.info->u.array_info.elt_type->type
!= ARGTYPE_CHAR))
|| cb_args->type->u.ptr_info.info->type != ARGTYPE_ARRAY)
return NULL;
/* The element type should be either character (for narrow
* strings) or an integral type (for wide strings). */
struct arg_type_info *et
= cb_args->type->u.ptr_info.info->u.array_info.elt_type;
switch (et->type) {
case ARGTYPE_CHAR:
case ARGTYPE_SHORT:
case ARGTYPE_USHORT:
case ARGTYPE_INT:
case ARGTYPE_UINT:
case ARGTYPE_LONG:
case ARGTYPE_ULONG:
break;
default:
return NULL;
}
struct param_enum *self = malloc(sizeof(*self));
if (self == NULL) {
......@@ -60,10 +81,12 @@ param_printf_init(struct value *cb_args, size_t nargs,
free(self);
return NULL;
}
self->width = type_sizeof(proc, et);
if (self->width == (size_t) -1)
goto fail;
if (value_init_deref(&self->array, cb_args) < 0)
goto fail;
assert(self->array.type->type == ARGTYPE_ARRAY);
self->format = (char *)value_get_data(&self->array, arguments);
......@@ -189,14 +212,29 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop,
size_t len_buf_len = 0;
struct lens *lens = NULL;
for (; self->ptr < self->end; ++self->ptr) {
for (; self->ptr < self->end; self->ptr += self->width) {
union {
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
char buf[0];
} u;
memcpy(u.buf, self->ptr, self->width);
switch (self->width) {
case 1: u.u64 = u.u8; break;
case 2: u.u64 = u.u16; break;
case 4: u.u64 = u.u32; break;
}
uint64_t c = u.u64;
if (!self->percent) {
if (*self->ptr == '%')
if (c == '%')
self->percent = 1;
continue;
}
switch (*self->ptr) {
switch (c) {
case '#': case ' ': case '-':
case '+': case 'I': case '\'':
/* These are only important for formatting,
......@@ -214,7 +252,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop,
= malloc(sizeof(*self->future_length));
if (self->future_length != NULL) {
++self->ptr;
self->ptr += self->width;
format_type = ARGTYPE_INT;
break;
}
......@@ -227,7 +265,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop,
* this to attach the appropriate string
* length expression. */
if (len_buf_len < sizeof(len_buf) - 1)
len_buf[len_buf_len++] = *self->ptr;
len_buf[len_buf_len++] = c;
continue;
case 'h':
......@@ -299,8 +337,7 @@ param_printf_next(struct param_enum *self, struct arg_type_info *infop,
lng++;
case 's':
format_type = ARGTYPE_ARRAY;
/* XXX "ls" means wchar_t string. */
elt_type = ARGTYPE_CHAR;
elt_type = lng == 0 ? ARGTYPE_CHAR : ARGTYPE_INT;
self->percent = 0;
lens = &string_lens;
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment