Commit 5eb82398 authored by Erick's avatar Erick

Added the utf8.c file

parent 161390dc
......@@ -2,7 +2,7 @@
#
# Author: Erick Gallesio [eg@unice.fr]
# Creation date: 11-Apr-2000 10:30 (eg)
# Last file update: 18-Apr-2011 23:49 (eg)
# Last file update: 30-Apr-2011 23:54 (eg)
CC = @CC@
CFLAGS = @CFLAGS@ @STKCFLAGS@
......@@ -33,7 +33,7 @@ stklos_SOURCES = base64.c blob.c boolean.c boot.c box.c char.c cond.c \
list.c misc.c md5.c number.c object.c parameter.c \
path.c port.c print.c proc.c process.c promise.c read.c regexp.c \
signal.c sio.c socket.c sport.c stklos.c str.c struct.c \
symbol.c system.c uvector.c vector.c vm.c vport.c $(THREAD_FILES)
symbol.c system.c utf8.c uvector.c vector.c vm.c vport.c $(THREAD_FILES)
### # The STklos library
......
......@@ -19,7 +19,7 @@
#
# Author: Erick Gallesio [eg@unice.fr]
# Creation date: 11-Apr-2000 10:30 (eg)
# Last file update: 18-Apr-2011 23:49 (eg)
# Last file update: 30-Apr-2011 23:54 (eg)
VPATH = @srcdir@
......@@ -59,9 +59,9 @@ am__stklos_SOURCES_DIST = base64.c blob.c boolean.c boot.c box.c \
keyword.c lib.c list.c misc.c md5.c number.c object.c \
parameter.c path.c port.c print.c proc.c process.c promise.c \
read.c regexp.c signal.c sio.c socket.c sport.c stklos.c str.c \
struct.c symbol.c system.c uvector.c vector.c vm.c vport.c \
thread-none.c mutex-none.c thread-common.c thread-pthreads.c \
mutex-common.c mutex-pthreads.c
struct.c symbol.c system.c utf8.c uvector.c vector.c vm.c \
vport.c thread-none.c mutex-none.c thread-common.c \
thread-pthreads.c mutex-common.c mutex-pthreads.c
@NO_THREAD_FALSE@@PTHREADS_TRUE@am__objects_1 = \
@NO_THREAD_FALSE@@PTHREADS_TRUE@ thread-common.$(OBJEXT) \
@NO_THREAD_FALSE@@PTHREADS_TRUE@ thread-pthreads.$(OBJEXT) \
......@@ -82,8 +82,8 @@ am_stklos_OBJECTS = base64.$(OBJEXT) blob.$(OBJEXT) boolean.$(OBJEXT) \
regexp.$(OBJEXT) signal.$(OBJEXT) sio.$(OBJEXT) \
socket.$(OBJEXT) sport.$(OBJEXT) stklos.$(OBJEXT) \
str.$(OBJEXT) struct.$(OBJEXT) symbol.$(OBJEXT) \
system.$(OBJEXT) uvector.$(OBJEXT) vector.$(OBJEXT) \
vm.$(OBJEXT) vport.$(OBJEXT) $(am__objects_1)
system.$(OBJEXT) utf8.$(OBJEXT) uvector.$(OBJEXT) \
vector.$(OBJEXT) vm.$(OBJEXT) vport.$(OBJEXT) $(am__objects_1)
stklos_OBJECTS = $(am_stklos_OBJECTS)
am__DEPENDENCIES_1 =
stklos_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
......@@ -271,7 +271,7 @@ stklos_SOURCES = base64.c blob.c boolean.c boot.c box.c char.c cond.c \
list.c misc.c md5.c number.c object.c parameter.c \
path.c port.c print.c proc.c process.c promise.c read.c regexp.c \
signal.c sio.c socket.c sport.c stklos.c str.c struct.c \
symbol.c system.c uvector.c vector.c vm.c vport.c $(THREAD_FILES)
symbol.c system.c utf8.c uvector.c vector.c vm.c vport.c $(THREAD_FILES)
### # The STklos library
......@@ -462,6 +462,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread-common.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread-none.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thread-pthreads.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf8.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uvector.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vector.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/vm.Po@am__quote@
......
......@@ -23,7 +23,7 @@
*
* Author: Erick Gallesio [eg@kaolin.unice.fr]
* Creation date: ??????
* Last file update: 24-Apr-2011 14:58 (eg)
* Last file update: 1-May-2011 12:35 (eg)
*/
#include <ctype.h>
......@@ -31,8 +31,6 @@
#include <wctype.h>
int STk_use_utf8 = 1;
struct charelem {
char *name;
unsigned char value;
......@@ -132,13 +130,24 @@ static int charcompi(SCM c1, SCM c2)
}
static void dump_utf8_str(char *str)
{
printf("Dump of '%s' (len = %d)\n", str, strlen(str));
while (*str) {
printf("%03d %02x ", (uint8_t) *str, (uint8_t) *str);
str++;
}
printf("---\n");
}
int STk_string2char(char *s)
/* converts a char name to a char */
{
register struct charelem *p;
int val = STk_utf82char((uint8_t *) s);
int val;
if (* (STk_utf8_grab_char(s, &val)) == '\0') return val;
if (val >= 0) return val;
for (p=chartable; *(p->name); p++) {
if (my_strcmpi(p->name, s) == 0) return (int) (p->value);
}
......@@ -158,59 +167,6 @@ char *STk_char2string(char c) /* convert a char to it's */
return NULL;
}
int STk_utf82char(uint8_t *buff)
{
if (((buff[0] & 0x80) == 0) && (buff[1] == '\0'))
return buff[0];
if ((buff[0] < 0xc0) || (buff[0] > 0xf7))
return -1;
if ((buff[0] < 0xe0) && (buff[2] == '\0'))
return ((buff[0] & 0x3f) << 6) + (buff[1] & 0x3f);
if ((buff[0] < 0xf0) && buff[3] == '\0')
return ((buff[0] & 0x1f) << 12) +
((buff[1] & 0x3f) << 6) +
(buff[2] & 0x3f);
if (buff[4] == '\0')
return ((buff[0] & 0x0f) << 16) +
((buff[1] & 0x3f) << 6) +
((buff[2] & 0x3f) << 6) +
(buff[3] & 0x3f);
return -1;
}
char *STk_char2utf8(int ch, uint8_t *buff)
{
register int n = 0;
char *start = (char *) buff;
if (ch < 0x80) {
*buff++ = ch;
n = 1;
} else if (ch < 0x800) {
*buff++ = (ch >> 6) | 0xc0;
*buff++ = (ch & 0x3f) | 0x80;
n = 2;
} else if (ch < 0x10000) {
*buff++ = (ch >> 12) | 0xe0;
*buff++ = ((ch >> 6) & 0x3f) | 0x80;
*buff++ = (ch & 0x3f) | 0x80;
n = 3;
} else if (ch < 0x110000) {
*buff++ = (ch >> 18) | 0xF0;
*buff++ = ((ch >> 12) & 0x3F) | 0x80;
*buff++ = ((ch >> 6) & 0x3F) | 0x80;
*buff++ = (ch & 0x3F) | 0x80;
n = 4;
}
*buff = '\0';
return start;
}
/*===========================================================================*\
*
......@@ -290,13 +246,13 @@ CHAR_COMPARE("char-ci>=?", chargei, (charcompi(c1,c2) >= 0))
/*=============================================================================*/
#define TEST_CTYPE(tst, name) \
DEFINE_PRIMITIVE(name, CPP_CONCAT(char_is, tst), subr1, (SCM c)) \
{ \
if (!CHARACTERP(c)) error_bad_char(c); \
return STk_use_utf8 ? \
MAKE_BOOLEAN(CPP_CONCAT(isw, tst)(CHARACTER_VAL((unsigned char)c))): \
MAKE_BOOLEAN(CPP_CONCAT(is, tst)(CHARACTER_VAL(c))); \
#define TEST_CTYPE(tst, name) \
DEFINE_PRIMITIVE(name, CPP_CONCAT(char_is, tst), subr1, (SCM c)) \
{ \
if (!CHARACTERP(c)) error_bad_char(c); \
return STk_use_utf8 ? \
MAKE_BOOLEAN(CPP_CONCAT(isw, tst)(CHARACTER_VAL(c))): \
MAKE_BOOLEAN(CPP_CONCAT(is, tst)((unsigned char) CHARACTER_VAL(c))); \
}
/*
......@@ -423,5 +379,6 @@ int STk_init_char(void)
ADD_PRIMITIVE(char_upcase);
ADD_PRIMITIVE(char_downcase);
return TRUE;
}
......@@ -22,7 +22,7 @@
*
* Author: Erick Gallesio [eg@kaolin.unice.fr]
* Creation date: ????
* Last file update: 24-Apr-2011 11:31 (eg)
* Last file update: 1-May-2011 22:30 (eg)
*
*
* Completely rewritten for the STklos version (Jan. 2000)
......@@ -60,31 +60,11 @@ STk_getc(SCM port)
int
STk_get_character(SCM port) /* result may be a wide character */
{
if (PORT_UNGETC(port) != EOF)
return STk_getc(port);
else {
int c = STk_getc(port);
if (STk_use_utf8 && (c >= 0x80)) {
/* Read an UTF-8 character */
if ((c < 0xc0) || (c > 0xf7))
return UTF8_INCORRECT_SEQUENCE;
else if (c < 0xe0)
c = ((c & 0x3f) << 6) +
((STk_getc(port) & 0x3F));
else if (c < 0xf0) {
c = ((c & 0x1f) << 12) +
((STk_getc(port) & 0x3f) << 6) +
((STk_getc(port) & 0x3f));
} else {
c = ((c & 0x0F) << 16) +
((STk_getc(port) &0x3f) << 6) +
((STk_getc(port) &0x3f) << 6) +
((STk_getc(port) &0x3F));
}
}
return c;
}
return (PORT_UNGETC(port) != EOF) ?
/* we have an ungetted char, call normal getc */
STk_getc(port):
/* try to read it as an UTF-8 sequence */
STk_utf8_read_char(port);
}
......
......@@ -21,7 +21,7 @@
*
* Author: Erick Gallesio [eg@unice.fr]
* Creation date: 28-Dec-1999 22:58 (eg)
* Last file update: 24-Apr-2011 11:30 (eg)
* Last file update: 1-May-2011 22:29 (eg)
*/
......@@ -371,11 +371,6 @@ char *STk_char2string(char c);
int STk_string2char(char *s);
int STk_init_char(void);
char *STk_char2utf8(int ch, uint8_t *buff);
int STk_utf82char(uint8_t *buff);
extern int STk_use_utf8;
/*
------------------------------------------------------------------------------
......@@ -1246,6 +1241,25 @@ EXTERN_PRIMITIVE("current-thread", current_thread, subr0, (void));
int STk_init_threads(int stack_size, void *start_stack);
int STk_init_mutexes(void);
/*
------------------------------------------------------------------------------
----
---- U T F 8 . C
----
------------------------------------------------------------------------------
*/
extern int STk_use_utf8;
char *STk_utf8_grab_char(char *str, int *c); /* result = pos. after current one */
int STk_char2utf8(int ch, char *str); /* result = length of the UTF-8 repr. */
int STk_utf8_char_length(int ch);
int STk_utf8_read_char(SCM port);
int STk_init_utf8(void);
/*
------------------------------------------------------------------------------
----
......
/*
* utf8.c -- UTF-8 support functions
*
* Copyright 2011 Erick Gallesio - Polytech'Nice-Sophia <eg@unice.fr>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*
* Author: Erick Gallesio [eg@unice.fr]
* Creation date: 30-Apr-2011 19:46 (eg)
* Last file update: 1-May-2011 22:29 (eg)
*/
#include "stklos.h"
int STk_use_utf8 = 1;
char *STk_utf8_grab_char(char *str, int *c) /* result = pos. after current one */
{
uint8_t *buff = (uint8_t *) str;
if ((buff[0] & 0x80) == 0) {
*c = buff[0];
return str + 1;
}
if ((buff[0] < 0xc0) || (buff[0] > 0xf7))
return NULL;
if (buff[0] < 0xe0) {
*c = ((buff[0] & 0x3f) << 6) + (buff[1] & 0x3f);
return str + 2;
}
if (buff[0] < 0xf0) {
*c = ((buff[0] & 0x1f) << 12) +
((buff[1] & 0x3f) << 6) +
(buff[2] & 0x3f);
return str + 3;
}
*c = ((buff[0] & 0x0f) << 16) +
((buff[1] & 0x3f) << 6) +
((buff[2] & 0x3f) << 6) +
(buff[3] & 0x3f);
return str + 4;
}
int STk_utf8_read_char(SCM port)
{
int c = STk_getc(port);
if (STk_use_utf8 && (c >= 0x80)) {
/* Read an UTF-8 character */
if ((c < 0xc0) || (c > 0xf7))
return UTF8_INCORRECT_SEQUENCE;
else if (c < 0xe0) {
c = (c & 0x3f) << 6;
c += STk_getc(port) & 0x3F;
} else if (c < 0xf0) {
c = (c & 0x1f) << 12;
c += (STk_getc(port) & 0x3f) << 6;
c += (STk_getc(port) & 0x3f);
} else {
c = (c & 0x0F) << 16;
c += (STk_getc(port) &0x3f) << 6;
c += (STk_getc(port) &0x3f) << 6;
c += (STk_getc(port) &0x3F);
}
}
return c;
}
int STk_char2utf8(int ch, char *str) /* result = length of the UTF-8 repr. */
{
uint8_t *buff = str;
int n;
if (ch < 0x80) {
*buff++ = ch;
n = 1;
} else if (ch < 0x800) {
*buff++ = (ch >> 6) | 0xc0;
*buff++ = (ch & 0x3f) | 0x80;
n = 2;
} else if (ch < 0x10000) {
*buff++ = (ch >> 12) | 0xe0;
*buff++ = ((ch >> 6) & 0x3f) | 0x80;
*buff++ = (ch & 0x3f) | 0x80;
n = 3;
} else if (ch < 0x110000) {
*buff++ = (ch >> 18) | 0xF0;
*buff++ = ((ch >> 12) & 0x3F) | 0x80;
*buff++ = ((ch >> 6) & 0x3F) | 0x80;
*buff++ = (ch & 0x3F) | 0x80;
n = 4;
} else {
STk_error("bad UTF-8 character %d", ch);
}
*buff = '\0';
return n;
}
int STk_utf8_char_length(int ch)
{
if (ch < 0x80) return 1;
if (ch < 0x800) return 2;
if (ch < 0x10000) return 3;
if (ch < 0x110000) return 4;
return -1;
}
/* ======================================================================
* STklos Primitives
* ====================================================================== */
#ifdef STK_DEBUG
DEFINE_PRIMITIVE("%char-utf8-encoding", char_utf8_encoding, subr1, (SCM c))
{
SCM lst = STk_nil;
uint8_t buffer[5];
int i;
if (!CHARACTERP(c)) STk_error("bad char ~S", c);
STk_char2utf8(CHARACTER_VAL(c), buffer);
for (i = strlen((char*) buffer)-1; i >= 0; i--)
lst = STk_cons(MAKE_INT(buffer[i]), lst);
return lst;
}
#endif
/* ======================================================================
* Initialization
* ====================================================================== */
int STk_init_utf8(void)
{
#ifdef STK_DEBUG
ADD_PRIMITIVE(char_utf8_encoding);
#endif
return TRUE;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment