Commit 3a3a10a3 authored by gerd's avatar gerd

Support for the ulex lexer: Pxp_reader has now two interfaces

for the lexer side, one for the classical Lexing.lexbuf, one for
ULB.unicode_lexbuf. Depending on which is opened, Pxp_reader fills
one of these buffer implementations.
	There is also support for ulex in lex.src, and there is a
linker module for ulex.
	ulex passes the "reader" and "canonxml" regression tests.
	Regression tests use now a toploop for the PXP core. Set the
LEXER variable to select on of the toploops.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@675 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent b443106f
......@@ -12,6 +12,7 @@ include Makefile.rules
all:
$(MAKE) -C tools all
for pkg in $(PKGLIST); do $(MAKE) -C src/$$pkg all || exit; done
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg clean || exit; done
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg generate || exit; done
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg all || exit; done
......@@ -20,6 +21,18 @@ opt:
for pkg in $(PKGLIST); do $(MAKE) -C src/$$pkg opt || exit; done
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg opt || exit; done
# The following are for development:
.PHONY: lexers
lexers:
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg generate || exit; done
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg all || exit; done
.PHONY: lexers-again
lexers-again:
for pkg in $(GENPKGLIST); do $(MAKE) -C gensrc/$$pkg clean || exit; done
$(MAKE) lexers
# The following PHONY rule is important for Cygwin:
.PHONY: install
install:
......
......@@ -9,7 +9,7 @@ OCAMLOPT = $(OCAMLFIND) ocamlopt $(OCAMLOPT_OPTIONS) -package "$(PACKAGES)"
OCAMLDEP = ocamldep $(OCAMLDEP_OPTIONS)
OCAMLFIND = ocamlfind
OCAMLYACC = ocamlyacc
OCAMLLEX = ocamllex
OCAMLLEX = ocamllex$(LEX_OPT)
WLEX = wlex
TOOLS_DIR = $(TOP_DIR)/tools
......
#! /bin/sh
# $Id: configure,v 1.21 2003/11/04 22:44:40 gerd Exp $
# $Id$
# defaults:
with_lex=1
with_wlex=1
with_wlex_compat=1
lexlist="utf8,iso88591,iso88592,iso88593,iso88594,iso88595,iso88596,iso88597,iso88598,iso88599,iso885910,iso885913,iso885914,iso885915"
version="1.1.94.2"
with_ulex=1
lexlist="utf8,iso88591,iso88592,iso88593,iso88594,iso88595,iso88596,iso88597,iso88598,iso88599,iso885910,iso885913,iso885914,iso885915,iso885916"
version="1.1.95pre1"
exec_suffix=""
help_lex="Enable/disable ocamllex-based lexical analyzer for the -lexlist encodings"
help_wlex="Enable/disable wlex-based lexical analyzer for UTF-8"
help_wlex_compat="Enable/disable wlex-style compatibility package for UTF-8 and ISO-8859-1"
help_ulex="Enable/disable ulex-based lexical analyzer for UTF-8"
options="lex wlex wlex_compat"
lexlist_options="utf8 usascii iso88591 iso88592 iso88593 iso88594 iso88595 iso88596 iso88597 iso88598 iso88599 iso885910 iso885913 iso885914 iso885915 koi8r windows1250 windows1251 windows1252 windows1253 windows1254 windows1255 windows1256 windows1257 windows1258 cp437 cp737 cp775 cp850 cp852 cp855 cp856 cp857 cp860 cp861 cp862 cp863 cp864 cp865 cp866 cp869 cp874 cp1006 macroman"
options="lex wlex wlex_compat ulex"
lexlist_options="utf8 usascii iso88591 iso88592 iso88593 iso88594 iso88595 iso88596 iso88597 iso88598 iso88599 iso885910 iso885913 iso885914 iso885915 iso885916 koi8r windows1250 windows1251 windows1252 windows1253 windows1254 windows1255 windows1256 windows1257 windows1258 cp437 cp737 cp775 cp850 cp852 cp855 cp856 cp857 cp860 cp861 cp862 cp863 cp864 cp865 cp866 cp869 cp874 cp1006 macroman"
print_options () {
for opt in $options; do
......@@ -127,6 +129,20 @@ else
exit 1
fi
######################################################################
# Check ocamllex.opt
printf "%s" "Checking for ocamllex.opt..."
r=`ocamllex.opt -help 2>&1`
lex_opt=""
case "$r" in
*usage*)
echo "found"
lex_opt=".opt" ;;
*)
echo "not found" ;;
esac
######################################################################
# Check netstring
......@@ -176,6 +192,21 @@ if [ $with_wlex -gt 0 ]; then
fi
fi
######################################################################
# ulex
if [ $with_ulex -gt 0 ]; then
printf "%s" "Checking for ulex... "
if ocamlfind query ulex >/dev/null 2>/dev/null; then
echo "found"
else
echo "not found"
echo "ulex support is disabled"
with_ulex=0
fi
fi
######################################################################
# Check Lexing.lexbuf type
......@@ -268,17 +299,27 @@ lexlist=`echo "$lexlist" | sed -e 's/,/ /g'`
# reqall: the predecessor list for package "pxp":
reqall="pxp-engine"
for lexname in $lexlist; do
if [ $with_wlex -gt 0 -a "$lexname" = "utf8" ] ; then
# Leave UTF-8 out
:
else
include=1
if [ "$lexname" = "utf8" ]; then
if [ $with_wlex -gt 0 -o $with_ulex -gt 0 ]; then
# Leave UTF-8 out
include=0
fi
fi
if [ $include -gt 0 ]; then
reqall="$reqall pxp-lex-$lexname"
fi
genpkglist="$genpkglist pxp-lex-$lexname"
done
if [ $with_wlex -gt 0 ]; then
if [ $with_wlex -gt 0 -a $with_ulex -eq 0 ]; then
reqall="$reqall pxp-wlex-utf8"
fi
if [ $with_ulex -gt 0 ]; then
reqall="$reqall pxp-ulex-utf8"
fi
if [ $with_wlex -gt 0 ]; then
genpkglist="$genpkglist pxp-wlex-utf8"
fi
......@@ -286,6 +327,10 @@ if [ $with_wlex_compat -gt 0 ]; then
genpkglist="$genpkglist pxp-wlex"
fi
if [ $with_ulex -gt 0 ]; then
genpkglist="$genpkglist pxp-ulex-utf8"
fi
for lexname in $lexlist_options; do
allgenpkglist="$allgenpkglist pxp-lex-$lexname"
done
......@@ -337,6 +382,17 @@ if [ $with_wlex_compat -gt 0 ]; then
gensrc/$pkg/META.in >gensrc/$pkg/META
fi
######################################################################
# Write META for ulex
if [ $with_ulex -gt 0 ]; then
pkg="pxp-ulex-utf8"
echo "Writing gensrc/$pkg/META"
sed -e "s/@VERSION@/$version/g" \
-e "s/@ENCNAME@/$enc/g" \
gensrc/$pkg/META.in >gensrc/$pkg/META
fi
######################################################################
# Write Makefile.conf
......@@ -348,6 +404,7 @@ GENPKGLIST = $genpkglist
ALLGENPKGLIST = $allgenpkglist
EXEC_SUFFIX = $exec_suffix
LEXBUF_307 = $lexbuf_307
LEX_OPT = $lex_opt
_EOF_
######################################################################
......
......@@ -28,8 +28,9 @@ gen_done: $(INPUT)/*.def $(INPUT)/*.src
if [ "$(ENC)" = "utf8" ]; then flags="-multiple"; else flags=""; fi; \
$(LEXPP) -charclasses "$(INPUT)/char_classes_generic.def" \
-lexsrc "$(INPUT)/lex.src" \
-linksrc "$(INPUT)/link_generic.src" \
-linksrc "$(INPUT)/link_ocamllex.src" \
-encoding "$(ENC)" \
-outformat ocamllex \
-outlexprefix "pxp_lex_$(ENC)" \
-outlinkprefix "pxp_lex_link_$(ENC)" $$flags
for mll in *.mll; do echo "Doing $$mll:"; $(OCAMLLEX) $$mll; done
......
description = "Polymorphic XML parser - Lexical analyzer for UTF-8 based on wlex"
requires = "pxp-engine,wlexing"
requires = "pxp-engine,ulex"
version = "@VERSION@"
archive(byte) = "pxp_wlex_utf8.cma pxp_wlex_link_utf8.cmo"
archive(native) = "pxp_wlex_utf8.cmxa pxp_wlex_link_utf8.cmx"
archive(byte) = "pxp_ulex_utf8.cma pxp_ulex_link_utf8.cmo"
archive(native) = "pxp_ulex_utf8.cmxa pxp_ulex_link_utf8.cmx"
......@@ -2,8 +2,8 @@ TOP_DIR = ../..
ENC = utf8
INPUT = $(TOP_DIR)/src/pxp-lex
ML = $(shell echo pxp_ulex_$(ENC)_*.ml)
CMO = $(MLL:.ml=.cmo)
CMX = $(MLL:.ml=.cmx)
CMO = $(ML:.ml=.cmo)
CMX = $(ML:.ml=.cmx)
OCAMLC_OPTIONS = -I $(TOP_DIR)/src/pxp-engine -syntax camlp4o
OCAMLOPT_OPTIONS = $(OCAMLC_OPTIONS) -syntax camlp4o
PACKAGES = netstring,ulex
......
include ../Makefile.conf
.PHONY: all
all:
all: toploops
top=`dirname $$PWD` && \
OCAMLPATH="$$top/gensrc:$$top/src" && \
OCAMLPATH="$$top/gensrc:$$top/src:$$OCAMLPATH" && \
export OCAMLPATH && \
$(MAKE) -C reader && \
$(MAKE) -C write && \
......@@ -11,9 +13,43 @@ all:
$(MAKE) -C dynamic && \
$(MAKE) -C lex
.PHONY: toploops
toploops: exit.cmo
if echo $(GENPKGLIST) | grep pxp-lex-utf8 >/dev/null 2>/dev/null; \
then $(MAKE) toploop.lex-utf8; fi
if echo $(GENPKGLIST) | grep pxp-wlex-utf8 >/dev/null 2>/dev/null; \
then $(MAKE) toploop.wlex-utf8; fi
if echo $(GENPKGLIST) | grep pxp-ulex-utf8 >/dev/null 2>/dev/null; \
then $(MAKE) toploop.ulex-utf8; fi
toploops-again:
rm -f toploop.*
$(MAKE) toploops
toploop.lex-utf8:
top=`dirname $$PWD` && \
OCAMLPATH="$$top/gensrc:$$top/src:$$OCAMLPATH" && \
export OCAMLPATH && \
ocamlfind ocamlmktop -o toploop.lex-utf8 -linkpkg -package threads,pxp,pxp-lex-utf8,findlib,str -thread
toploop.wlex-utf8:
top=`dirname $$PWD` && \
OCAMLPATH="$$top/gensrc:$$top/src:$$OCAMLPATH" && \
export OCAMLPATH && \
ocamlfind ocamlmktop -o toploop.wlex-utf8 -linkpkg -package threads,pxp,pxp-wlex-utf8,str -thread
toploop.ulex-utf8:
top=`dirname $$PWD` && \
OCAMLPATH="$$top/gensrc:$$top/src:$$OCAMLPATH" && \
export OCAMLPATH && \
ocamlfind ocamlmktop -o toploop.ulex-utf8 -linkpkg -package threads,pxp,pxp-ulex-utf8,str -thread
exit.cmo:
ocamlc -c exit.ml
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.cmxa
rm -f *.cmi *.cmo *.cma *.cmx *.cmxa toploop.*
.PHONY: CLEAN
CLEAN: clean
......
......@@ -6,7 +6,12 @@
Do "make" to start the compilation of the tests.
- To run the tests:
./run
LEXER=<id> ./run
where <id> is one of:
- lex-utf8
- wlex-utf8
- ulex-utf8
- Program dumpfiles: Do "make dumpfiles" to create it.
It takes XML file names on the command line, and writes a Latex
......
......@@ -6,7 +6,8 @@
#----------------------------------------------------------------------
test_canonxml: test_canonxml.ml
ocamlfind ocamlc -g -custom -o test_canonxml -package pxp,str -linkpkg test_canonxml.ml
ocamlfind ocamlc -g -a -o test_canonxml.cma -package pxp,str test_canonxml.ml
../create-wrapper test_canonxml
#----------------------------------------------------------------------
.PHONY: all
......@@ -15,6 +16,7 @@ all:
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
rm -f test_canonxml
.PHONY: CLEAN
CLEAN: clean
......@@ -22,4 +24,4 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f test_canonxml
......@@ -7,17 +7,21 @@
all: compile marshal_simple marshal_namespace marshal_recode
compile: compile.ml
ocamlfind ocamlc -g -custom -o compile -package pxp,str -linkpkg compile.ml
compile: xmlcompile.ml
ocamlfind ocamlc -g -a -o compile.cma -package pxp,str xmlcompile.ml
../create-wrapper compile
marshal_simple: marshal_simple.ml
ocamlfind ocamlc -g -custom -o marshal_simple -package pxp,str -linkpkg marshal_simple.ml
ocamlfind ocamlc -g -a -o marshal_simple.cma -package pxp,str marshal_simple.ml
../create-wrapper marshal_simple
marshal_namespace: marshal_namespace.ml
ocamlfind ocamlc -g -custom -o marshal_namespace -package pxp,str -linkpkg marshal_namespace.ml
ocamlfind ocamlc -g -a -o marshal_namespace.cma -package pxp,str marshal_namespace.ml
../create-wrapper marshal_namespace
marshal_recode: marshal_recode.ml
ocamlfind ocamlc -g -custom -o marshal_recode -package pxp -linkpkg marshal_recode.ml
ocamlfind ocamlc -g -a -o marshal_recode.cma -package pxp marshal_recode.ml
../create-wrapper marshal_recode
#----------------------------------------------------------------------
......@@ -28,6 +32,7 @@ all:
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa sample sample.ml out1 out2
rm -f *.d1 *.d2 *.d3 *.out *.out1 *.out2
rm -f compile marshal_simple marshal_namespace marshal_recode
.PHONY: CLEAN
CLEAN: clean
......@@ -35,5 +40,4 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f compile marshal_simple marshal_namespace marshal_recode
......@@ -6,7 +6,8 @@ sample="$1"
echo "Testing $sample:"
./compile -in "$sample" -out "sample.ml" -print -super-root -pis -comments >"out1"
echo "- code written to sample.ml, formatted data to out1"
OCAMLPATH=../../src ocamlfind ocamlc -package pxp -linkpkg -custom sample.ml -o sample
OCAMLPATH=../../src:../../gensrc:$OCAMLPATH ocamlfind ocamlc -package pxp -a sample.ml -o sample.cma
../create-wrapper sample
echo "- sample.ml compiled to sample"
./sample >out2
echo "- re-read data written to out2"
......
#! /bin/sh
name="$1"
cat <<_EOF_ >$name
#! /bin/sh
if [ -z "\$LEXER" ]; then
echo "Set the LEXER env variable to select the UTF-8 lexer." >&2
echo "Possible values for LEXER:" >&2
(cd ..; ls -1 toploop.* | sed -e 's/toploop\./- /')
exit 1
fi
exec ../toploop.\$LEXER "$name.cma" ../exit.ml "\$@"
_EOF_
chmod a+x "$name"
......@@ -8,13 +8,16 @@
all: create_element modify strip
create_element: create_element.ml
ocamlfind ocamlc -labels -g -custom -o create_element -package pxp -linkpkg create_element.ml
ocamlfind ocamlc -labels -g -a -o create_element.cma -package pxp create_element.ml
../create-wrapper create_element
modify: modify.ml
ocamlfind ocamlc -g -custom -o modify -package pxp -linkpkg modify.ml
ocamlfind ocamlc -g -a -o modify.cma -package pxp modify.ml
../create-wrapper modify
strip: strip.ml
ocamlfind ocamlc -g -custom -o strip -package pxp -linkpkg strip.ml
ocamlfind ocamlc -g -a -o strip.cma -package pxp strip.ml
../create-wrapper strip
#----------------------------------------------------------------------
.PHONY: all
......@@ -23,6 +26,7 @@ all:
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out.xml
rm -f create_element modify strip
.PHONY: CLEAN
CLEAN: clean
......@@ -30,4 +34,3 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f create_element modify strip
......@@ -4,8 +4,9 @@ all: test_lex_utf8
test_lex_utf8: test_lex_utf8.ml
ocamlfind ocamlc -o test_lex_utf8 -package pxp-lex-utf8 -linkpkg \
ocamlfind ocamlc -o test_lex_utf8.cma -a -package pxp-lex-utf8 \
test_lex_utf8.ml
../create-wrapper test_lex_utf8
clean:
rm -rf test_lex_utf8 *.cmo *.cmi
......
......@@ -5,7 +5,8 @@
#----------------------------------------------------------------------
test_negative: test_negative.ml
ocamlfind ocamlc -g -custom -o test_negative -package pxp,str -linkpkg test_negative.ml
ocamlfind ocamlc -g -a -o test_negative.cma -package pxp,str test_negative.ml
../create-wrapper test_negative
#----------------------------------------------------------------------
.PHONY: all
......@@ -14,6 +15,7 @@ all:
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa current.out
rm -f test_negative
.PHONY: CLEAN
CLEAN: clean
......@@ -21,6 +23,5 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f test_negative
......@@ -8,14 +8,19 @@
.PHONY: all
all: test_reader test_rewritten_reader
test_reader: test_reader.ml minilex.ml
ocamlfind ocamlc -o test_reader -package pxp,unix,threads \
-linkpkg -thread \
test_reader: test_reader.cma
../create-wrapper test_reader
test_rewritten_reader: test_rewritten_reader.cma
../create-wrapper test_rewritten_reader
test_reader.cma: test_reader.ml minilex.ml
ocamlfind ocamlc -a -o test_reader.cma -package pxp,unix,threads \
-thread \
-g minilex.ml test_reader.ml
test_rewritten_reader: test_rewritten_reader.ml minilex.ml
ocamlfind ocamlc -o test_rewritten_reader -package pxp,unix \
-linkpkg \
test_rewritten_reader.cma: test_rewritten_reader.ml minilex.ml
ocamlfind ocamlc -a -o test_rewritten_reader.cma -package pxp,unix \
-g minilex.ml test_rewritten_reader.ml
minilex.ml: minilex.mll
......@@ -26,6 +31,7 @@ minilex.ml: minilex.mll
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa minilex.ml
rm -f test_reader test_rewritten_reader
.PHONY: CLEAN
CLEAN: clean
......@@ -33,5 +39,5 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f test_reader test_rewritten_reader
This diff is collapsed.
......@@ -29,7 +29,7 @@ let t001 () =
r # init_rep_encoding `Enc_iso88591;
r # init_warner None (new drop_warnings);
let lsrc = r # open_in xid in
let lb = Lexing.from_function (fun s n -> lsrc s 0 n) in
let lb = Lazy.force lsrc.lsrc_lexbuf in
let aid = r # active_id in
assert(aid.rid_private = Some pid);
assert(aid.rid_public = None);
......@@ -327,7 +327,7 @@ let t021 () =
rid_private = Some pid_a;
rid_system_base = Some "http://host/x/y"
} in
let lex_a = Lexing.from_function (fun s n -> lex_a_src s 0 n) in
let lex_a = Lazy.force lex_a_src.lsrc_lexbuf in
assert(nextchar lex_a = Some 'a');
let res_b = res_a # clone in
......@@ -336,7 +336,7 @@ let t021 () =
rid_system_base =
(res_a # active_id).rid_system;
} in
let lex_b = Lexing.from_function (fun s n -> lex_b_src s 0 n) in
let lex_b = Lazy.force lex_b_src.lsrc_lexbuf in
assert(nextchar lex_b = Some 'b');
let res_c = res_b # clone in
......@@ -345,7 +345,7 @@ let t021 () =
rid_system_base =
(res_b # active_id).rid_system;
} in
let lex_c = Lexing.from_function (fun s n -> lex_c_src s 0 n) in
let lex_c = Lazy.force lex_c_src.lsrc_lexbuf in
assert(nextchar lex_c = Some 'c');
true
......@@ -360,7 +360,7 @@ let t022 () =
rid_system = Some "t_a.dat";
rid_system_base = None;
} in
let lex_a = Lexing.from_function (fun s n -> lex_a_src s 0 n) in
let lex_a = Lazy.force lex_a_src.lsrc_lexbuf in
assert(nextchar lex_a = Some 'a');
let res_b = res_a # clone in
......@@ -369,7 +369,7 @@ let t022 () =
rid_system_base =
(res_a # active_id).rid_system;
} in
let lex_b = Lexing.from_function (fun s n -> lex_b_src s 0 n) in
let lex_b = Lazy.force lex_b_src.lsrc_lexbuf in
assert(nextchar lex_b = Some 'b');
true
......@@ -390,7 +390,7 @@ let t023 () =
rid_public = Some "A";
rid_system_base = None;
} in
let lex_a = Lexing.from_function (fun s n -> lex_a_src s 0 n) in
let lex_a = Lazy.force lex_a_src.lsrc_lexbuf in
assert(nextchar lex_a = Some 'a');
let res_b = res_a # clone in
......@@ -399,7 +399,7 @@ let t023 () =
rid_system_base =
(res_a # active_id).rid_system;
} in
let lex_b = Lexing.from_function (fun s n -> lex_b_src s 0 n) in
let lex_b = Lazy.force lex_b_src.lsrc_lexbuf in
assert(nextchar lex_b = Some 'b');
true
......@@ -422,7 +422,7 @@ let t024 () =
Some "http://user@foo/x/y/t_a.dat";
rid_system_base = None;
} in
let lex_a = Lexing.from_function (fun s n -> lex_a_src s 0 n) in
let lex_a = Lazy.force lex_a_src.lsrc_lexbuf in
assert(nextchar lex_a = Some 'a');
let res_b = res_a # clone in
......@@ -431,7 +431,7 @@ let t024 () =
rid_system_base =
(res_a # active_id).rid_system;
} in
let lex_b = Lexing.from_function (fun s n -> lex_b_src s 0 n) in
let lex_b = Lazy.force lex_b_src.lsrc_lexbuf in
assert(nextchar lex_b = Some 'b');
true
......
......@@ -8,17 +8,24 @@
.PHONY: all
all: test_write test_namespace
test_write: test_write.ml
ocamlfind ocamlc -g -custom -o test_write -package pxp,str -linkpkg test_write.ml
test_write: test_write.cma
../create-wrapper test_write
test_namespace: test_namespace.ml
ocamlfind ocamlc -g -custom -o test_namespace -package pxp,str -linkpkg test_namespace.ml -predicates pxp_without_utf8
test_namespace: test_namespace.cma
../create-wrapper test_namespace
test_write.cma: test_write.ml
ocamlfind ocamlc -a -g -o test_write.cma -package pxp,str test_write.ml
test_namespace.cma: test_namespace.ml
ocamlfind ocamlc -a -g -o test_namespace.cma -package pxp,str test_namespace.ml
#----------------------------------------------------------------------
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa out1 out2 out3
rm -f test_write test_namespace
.PHONY: CLEAN
CLEAN: clean
......@@ -26,5 +33,4 @@ CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
rm -f test_write test_namespace
......@@ -121,7 +121,7 @@ let rec expand_attvalue_with_rec_check (lexobj : lexer_obj) l dtd entities norm_
expand_attvalue_with_rec_check
lexobj l dtd entities norm_crlf
| CharData _ ->
let ll = lexobj # lexeme_len in
let ll = lexobj # lexeme_strlen in
if ll > 1 && ll = l then
raise Quick_exit
else
......@@ -141,14 +141,19 @@ let expand_attvalue (lexobj : lexer_obj) dtd s norm_crlf =
* not (i.e. two characters).
* lexbuf: must result from a previous Lexing.from_string
*)
(* print_string ("expand_attvalue \"" ^ s ^ "\" = "); *)
try
lexobj # open_string_inplace s;
let l =
expand_attvalue_with_rec_check
lexobj (String.length s) dtd [] norm_crlf in
String.concat "" l
let s' =
String.concat "" l in
(* print_string ("\"" ^ s' ^ "\"\n"); *)
s'
with
Quick_exit ->
(* print_string ("\"" ^ s ^ "\"\n"); *)
s
;;
......@@ -343,6 +348,7 @@ let split_attribute_value (lfactory:lexer_factory) v =
(* splits 'v' into a list of names or nmtokens. The white space separating
* the names/nmtokens in 'v' is suppressed and not returned.
*)
(* print_string ("split_attribute_value \"" ^ v ^ "\" = "); *)
let lexobj = lfactory # open_string_inplace v in
let scan = lexobj # scan_name_string in
let rec get_name_list() =
......@@ -353,7 +359,12 @@ let split_attribute_value (lfactory:lexer_factory) v =
| Nametoken s -> s :: get_name_list()
| _ -> raise(Validation_error("Illegal attribute value"))
in
get_name_list()
let l = get_name_list() in
(* print_string "[";
print_string (String.concat "," l);
print_string "]\n";
*)
l
;;
......
......@@ -180,6 +180,7 @@ type rep_encoding =
| `Enc_iso885913
| `Enc_iso885914
| `Enc_iso885915
| `Enc_iso885916
| `Enc_koi8r
| `Enc_windows1250
| `Enc_windows1251
......
......@@ -197,6 +197,7 @@ module type CORE_TYPES = sig
| `Enc_iso885913
| `Enc_iso885914
| `Enc_iso885915
| `Enc_iso885916
| `Enc_koi8r
| `Enc_windows1250
| `Enc_windows1251
......
......@@ -223,7 +223,7 @@ let update_content_lines v tok =
count_lines v.linecount v.lexobj#lexeme;
update_lines v;
| _ ->
v.column <- v.column + v.lexobj#lexeme_len
v.column <- v.column + v.lexobj#lexeme_strlen
;;
let update_lines_within_tag v tok =
......@@ -241,7 +241,7 @@ let update_lines_within_tag v tok =
v.line <- v.line + 1;
v.column <- 0;
| _ ->
v.column <- v.column + v.lexobj#lexeme_len
v.column <- v.column + v.lexobj#lexeme_strlen
;;
let update_other_lines v tok =
......
......@@ -216,6 +216,9 @@ object
method scan_tag_eb : unit -> (token * lexers)
method scan_tag_eb_att : unit -> bool -> (token * lexers)
method lexeme_length : int
method lexeme_char : int -> int
method lexeme : string
method lexeme_len : int
method lexeme_strlen : int
method sub_lexeme : int -> int -> string
end
......@@ -193,11 +193,31 @@ object
method scan_tag_eb : unit -> (token * lexers)
method scan_tag_eb_att : unit -> bool -> (token * lexers)
method lexeme_length : int
(** The length of the lexeme in characters
*
* For some implementations, this function is very ineffecient.
*)
method lexeme_char : int -> int
(** Returns one character of the lexeme as Unicode code point
*
* For some implementations, this function is very ineffecient.
*)
method lexeme : string
(** The lexeme scanned last, encoded as [encoding] *)
method lexeme_len : int
method lexeme_strlen : int
(** = String.length lexeme, i.e. number of bytes of the lexeme,
* not the number of characters
*)
method sub_lexeme : int -> int -> string
(** A substring of the current lexeme. The arguments are the position
* and length of the substring in characters (not bytes). The string
* is encoded in [encoding].
*