Commit 18c69caf authored by Erick's avatar Erick

Corrected some problems on character encoding depending of the SHELL LC_* varisables

parent e70e2df6
This diff is collapsed.
This diff is collapsed.
/*
* p r i n t . c -- writing stuff
*
* Copyright © 1993-2011 Erick Gallesio - I3S-CNRS/ESSI <eg@unice.fr>
* Copyright © 1993-2012 Erick Gallesio - I3S-CNRS/ESSI <eg@unice.fr>
*
*
* This program is free software; you can redistribute it and/or modify
......@@ -21,7 +21,7 @@
*
* Author: Erick Gallesio [eg@unice.fr]
* Creation date: ??-Oct-1993 ??:??
* Last file update: 23-Jul-2011 12:12 (eg)
* Last file update: 26-Feb-2012 18:25 (eg)
*
*/
#include <ctype.h>
......@@ -88,11 +88,12 @@ static void Inline printkeyword(SCM key, SCM port, int mode)
}
static char printhexa(int x)
static Inline char printhexa(int x)
{
return (x >= 10) ? (x - 10 + 'a') : (x + '0');
}
static void printstring(SCM s, SCM port, int mode)
{
if (mode == DSP_MODE) {
......@@ -122,18 +123,27 @@ static void printstring(SCM s, SCM port, int mode)
case '\v' : *buff++ = '\\'; *buff++ = 'v'; break;
case '"' :
case '\\' : *buff++ = '\\'; *buff++ = *p; break;
default : if (STk_use_utf8)
*buff++ = *p;
else {
if ((((unsigned char) *p) & 0177) < (unsigned char) ' ') {
/* Non printable character (It works only for ISO 8859-x !!) */
*buff++ = '\\';
*buff++ = 'x';
*buff++ = printhexa((unsigned char) *p / 16);
*buff++ = printhexa((unsigned char) *p % 16);
default : {
int printable;
if (STk_use_utf8)
printable =
(((unsigned) *p) >= (unsigned) ' ');
else
printable =
((((unsigned char) *p) & 0177) >= (unsigned char) ' ');
if (printable)
*buff++ = *p;
else {
/* Non printable char. (It works only for char < 0xFF !!) */
*buff++ = '\\';
*buff++ = 'x';
*buff++ = printhexa((unsigned char) *p / 16);
*buff++ = printhexa((unsigned char) *p % 16);
*buff++ = ';';
}
}
else *buff++ = *p;
}
}
}
*buff++ = '"';
......
/*
* r e a d . c -- reading stuff
*
* Copyright © 1993-2011 Erick Gallesio - I3S-CNRS/ESSI <eg@unice.fr>
* Copyright © 1993-2012 Erick Gallesio - I3S-CNRS/ESSI <eg@unice.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -20,7 +20,7 @@
*
* Author: Erick Gallesio [eg@unice.fr]
* Creation date: ??-Oct-1993 ??:??
* Last file update: 8-Oct-2011 16:09 (eg)
* Last file update: 26-Feb-2012 18:51 (eg)
*
*/
......@@ -92,9 +92,12 @@ static void error_bad_dotted_list(SCM port)
signal_error(port, "bad dotted list", STk_nil);
}
static void error_bad_inline_hexa_sequence(SCM port)
static void error_bad_inline_hexa_sequence(SCM port, char *buffer, int x)
{
signal_error(port, "bad inline hexa sequence", STk_nil);
char message[200];
snprintf(message, 200, "bad inline hexa sequence (%s %d) on port ~S", buffer, x);
signal_error(port, message, port);
}
static void warning_parenthesis(SCM port)
......@@ -133,12 +136,12 @@ static int read_hex_sequence(SCM port, char* utf8_seq)
buffer[i] = '\0';
if (c != ';')
error_bad_inline_hexa_sequence(port);
error_bad_inline_hexa_sequence(port, buffer, 1);
else {
val = strtol(buffer, &end, 16);
if (val == LONG_MIN || val == LONG_MAX || *end != ';')
error_bad_inline_hexa_sequence(port);
error_bad_inline_hexa_sequence(port, buffer, 2);
else
if (STk_use_utf8) {
int len = STk_char2utf8(val, utf8_seq);
......@@ -153,7 +156,7 @@ static int read_hex_sequence(SCM port, char* utf8_seq)
}
/* if we are here , we have an error */
error_bad_inline_hexa_sequence(port);
error_bad_inline_hexa_sequence(port, buffer,3);
return 0;
}
......@@ -304,7 +307,7 @@ static SCM read_char(SCM port, int c)
for( ; ; ) {
tok[j++] = c;
c = STk_getc(port);
if (c == EOF || ((c <=0x80) && isspace((unsigned char)c)))
if (c == EOF || ((c <=0x80) && isspace((unsigned char)c)))
/* (c < 0x80) is for MacOs */
break;
if (strchr("()[]'`,;\"", c)) {
......
......@@ -21,7 +21,7 @@
*
* Author: Erick Gallesio [eg@unice.fr]
* Creation date: 28-Dec-1999 21:19 (eg)
* Last file update: 1-Jan-2012 19:33 (eg)
* Last file update: 26-Feb-2012 23:32 (eg)
*/
#include <stklos.h>
......@@ -181,15 +181,15 @@ int main(int argc, char *argv[])
argv += ret;
/* See if we use UTF8 encoding */
if (!setlocale(LC_CTYPE, "")) {
if (!setlocale(LC_ALL, "")) {
fprintf(stderr, "Can't set the specified locale! "
"Check LANG, LC_CTYPE, LC_ALL.\n");
return 1;
} else {
if (STk_use_utf8 == -1) {
/* user didn't force the encoding. Determine it from environment */
STk_use_utf8 = (strcmp(nl_langinfo(CODESET), "UTF-8") == 0);
}
}
if (STk_use_utf8 == -1) {
/* user didn't force the encoding. Determine it from environment */
STk_use_utf8 = (strcmp(nl_langinfo(CODESET), "UTF-8") == 0);
}
/* Hack: to give the illusion that there is no VM under the scene */
......
......@@ -16,7 +16,7 @@
*
* Author: Erick Gallesio [eg@kaolin.unice.fr]
* Creation date: 29-Mar-1994 10:57
* Last file update: 2-Jan-2012 13:23 (eg)
* Last file update: 26-Feb-2012 23:14 (eg)
*/
#include <unistd.h>
......@@ -26,6 +26,7 @@
#include <fcntl.h>
#include <dirent.h>
#include <time.h>
#include <locale.h>
#include "stklos.h"
#include "struct.h"
......@@ -1144,6 +1145,14 @@ DEFINE_PRIMITIVE("%big-endian?", big_endianp, subr0, (void))
}
DEFINE_PRIMITIVE("%get-locale", get_locale, subr0, (void))
{
char *str = setlocale(LC_ALL, NULL);
return str? STk_Cstring2string(str) : STk_false;
}
int STk_init_system(void)
{
SCM current_module = STk_STklos_module;
......@@ -1222,5 +1231,6 @@ int STk_init_system(void)
ADD_PRIMITIVE(pause);
ADD_PRIMITIVE(big_endianp);
ADD_PRIMITIVE(get_locale);
return TRUE;
}
......@@ -2,7 +2,7 @@
#
# Author: Erick Gallesio [eg@essi.fr]
# Creation date: 23-May-2005 14:52 (eg)
# Last file update: 23-Oct-2010 11:54 (eg)
# Last file update: 26-Feb-2012 19:04 (eg)
all:
@echo "Use \"make test\" for testing STklos."
......@@ -10,7 +10,7 @@ all:
check: test
test:
@../src/stklos -f do-test.stk
@../src/stklos --utf8-encoding=yes -f do-test.stk
clean:
rm -f TEST.LOG data *~
......@@ -19,5 +19,3 @@ distclean: clean
rm -f Makefile
install:
......@@ -19,7 +19,7 @@
#
# Author: Erick Gallesio [eg@essi.fr]
# Creation date: 23-May-2005 14:52 (eg)
# Last file update: 23-Oct-2010 11:54 (eg)
# Last file update: 26-Feb-2012 19:04 (eg)
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
......@@ -360,7 +360,7 @@ all:
check: test
test:
@../src/stklos -f do-test.stk
@../src/stklos --utf8-encoding=yes -f do-test.stk
clean:
rm -f TEST.LOG data *~
......
;;;; -*- coding utf-8 -*-
;;;; test-utf8.stk -- Test of UTF-8 strings
;;;;
;;;; Copyright © 2011 Erick Gallesio - Polytech'Nice-Sophia <eg@unice.fr>
;;;; Copyright © 2011-2012 Erick Gallesio - Polytech'Nice-Sophia <eg@unice.fr>
;;;;
;;;;
;;;; This program is free software; you can redistribute it and/or modify
......@@ -21,15 +21,22 @@
;;;;
;;;; Author: Erick Gallesio [eg@unice.fr]
;;;; Creation date: 27-May-2011 23:35 (eg)
;;;; Last file update: 8-Oct-2011 22:13 (eg)
;;;; Last file update: 26-Feb-2012 23:33 (eg)
;;;;
(require "test")
(define *lang-is-utf8?* (let ((lang (getenv "LANG")))
(and lang
(or (string-find? "UTF8" lang)
(string-find? "utf8" lang)))))
;;(define *lang-is-utf8?* (let ((lang (getenv "LANG")))
;; (and lang
;; (or (string-find? "UTF8" lang)
;; (string-find? "utf8" lang)))))
(define *lang-is-utf8?* #t) ;; In fact, we force it, since it must work even if
;; user doesn't use UTF-8 when launch the test.
;; STklos is now called with with -u=1 option
(define *is-C?* (equal? (%get-locale) "C"))
(test-section "Unicode Characters")
;;------------------------------------------------------------------
......@@ -95,11 +102,11 @@
(test "gambit.8" #\xDF (char-foldcase #\xDF))
(test "gambit.9" #\x3A3 (char-upcase #\x3A3))
(when *lang-is-utf8?* (test "gambit.10" #\x3C3 (char-downcase #\x3A3)))
(unless *is-C?* (test "gambit.10" #\x3C3 (char-downcase #\x3A3)))
#;(test "gambit.11" #\x3A3 (char-titlecase #\x3A3)) ;; not R7
(when *lang-is-utf8?* (test "gambit.12" #\x3C3 (char-foldcase #\x3A3)))
(when *lang-is-utf8?* (test "gambit.13" #\x3A3 (char-upcase #\x3C2)))
(unless *is-C?*
(test "gambit.12" #\x3C3 (char-foldcase #\x3A3))
(test "gambit.13" #\x3A3 (char-upcase #\x3C2)))
(test "gambit.14" #\x3C2 (char-downcase #\x3C2))
#;(test "gambit.15" #\x3A3 (char-titlecase #\x3C2)) ;; not R7
(test "gambit.16" #\x3C3 (char-foldcase #\x3C2))
......@@ -133,10 +140,13 @@
(test "gambit.42" #f (char-whitespace? #\a))
(test "gambit.43" #f (char-upper-case? #\a))
(test "gambit.44" #t (char-upper-case? #\A))
(when *lang-is-utf8?* (test "gambit.45" #t (char-upper-case? #\x3A3)))
(unless *is-C?*
(test "gambit.45" #t (char-upper-case? #\x3A3)))
(test "gambit.46" #t (char-lower-case? #\a))
(test "gambit.47" #f (char-lower-case? #\A))
(when *lang-is-utf8?* (test "gambit.48" #t (char-lower-case? #\x3C3)))
(unless *is-C?*
(test "gambit.48" #t (char-lower-case? #\x3C3)))
#;(test "gambit.49" #t (char-lower-case? #\x00AA)) ;; not clear
#;(test "gambit.50" #f (char-title-case? #\a)) ;; Not R7
#;(test "gambit.51" #f (char-title-case? #\A)) ;; Not R7
......@@ -160,7 +170,8 @@
#;(test "gambit.66" "strasse" (string-foldcase "Stra\xDF;e")) ;; not R7
(test "gambit.67" "strasse" (string-downcase "STRASSE"))
(when *lang-is-utf8?* (test "gambit.68" "\x3C3;" (string-downcase "\x3A3;")))
(unless *is-C?*
(test "gambit.68" "\x3C3;" (string-downcase "\x3A3;")))
(test "gambit.69" "\x39E;\x391;\x39F;\x3A3;"
(string-upcase "\x39E;\x391;\x39F;\x3A3;"))
......@@ -170,7 +181,7 @@
(string-downcase "\x39E;\x391;\x39F;\x3A3;\x3A3;"))
#;(test "gambit.72" "\x3BE;\x3B1;\x3BF;\x3C2; \x3C3;"
(string-downcase "\x39E;\x391;\x39F;\x3A3; \x3A3;")) ;; not R7
(when *lang-is-utf8?*
(unless *is-C?*
(test "gambit.73" "\x3BE;\x3B1;\x3BF;\x3C3;"
(string-foldcase "\x39E;\x391;\x39F;\x3A3;"))
(test "gambit.74" "\x39E;\x391;\x39F;\x3A3;"
......@@ -206,7 +217,7 @@
#;(test "gambit.96" #t (string-ci=? "Stra\xDF;e" "STRASSE")) ;; Not R7
#;(test "gambit.97" #t (string-ci=? "\x39E;\x391;\x39F;\x3A3;" ;; Not R7
"\x3BE;\x3B1;\x3BF;\x3C2;"))
(when *lang-is-utf8?*
(unless *is-C?*
(test "gambit.98" #t (string-ci=? "\x39E;\x391;\x39F;\x3A3;"
"\x3BE;\x3B1;\x3BF;\x3C3;")))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment