Commit a5283158 authored by gerd's avatar gerd

Initial revision.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@677 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent ba40e891
# make validate: make bytecode executable
# make validate.opt: make native executable
# make clean: remove intermediate files (in this directory)
# make CLEAN: remove intermediate files (recursively)
# make distclean: remove any superflous files (recursively)
#----------------------------------------------------------------------
# Set LEXER to: lex-utf8, wlex-utf8, ulex-utf8
REQ=str pxp-engine pxp-lex-iso88591 pxp-$(LEXER)
all: pxpvalidate.byte pxpvalidate.opt pxpevparse.byte pxpevparse.opt
pxpvalidate.opt: validate.ml
ocamlfind ocamlopt -o pxpvalidate.opt -package "$(REQ)" -linkpkg validate.ml
pxpvalidate.byte: validate.ml
ocamlfind ocamlc -o pxpvalidate.byte -package "$(REQ)" -linkpkg validate.ml
pxpevparse.opt: evparse.ml
ocamlfind ocamlopt -o pxpevparse.opt -package "$(REQ)" -linkpkg evparse.ml
pxpevparse.byte: evparse.ml
ocamlfind ocamlc -o pxpevparse.byte -package "$(REQ)" -linkpkg evparse.ml
#----------------------------------------------------------------------
.PHONY: all
all:
.PHONY: clean
clean:
rm -f *.cmi *.cmo *.cma *.cmx *.o *.a *.cmxa
rm -f pxpvalidate.opt pxpvalidate.byte
rm -f pxpevparse.opt pxpevparse.byte
.PHONY: CLEAN
CLEAN: clean
.PHONY: distclean
distclean: clean
rm -f *~
----------------------------------------------------------------------
pxpvalidate macbeth.xml
----------------------------------------------------------------------
- OCaml 3.07pl2
PXP revision 676
- lex-utf8:
Byte: 1.1 s (user)
Opt: 0.39 s (user)
- wlex-utf8:
Byte: 1.1 s (user)
Opt: 0.40 s (user)
- ulex-utf8:
Byte: 2.31 s (user)
Opt: 0.52 s (user)
----------------------------------------------------------------------
pxpevparse macbeth.xml
----------------------------------------------------------------------
- OCaml 3.07pl2
PXP revision 676
- lex-utf8:
Byte: 0.56 s (user)
Opt: 0.14 s (user)
- wlex-utf8:
Byte: 0.57 s (user)
Opt: 0.16 s (user)
- ulex-utf8:
Byte: 1.8 s (user)
Opt: 0.3 s (user)
(* $Id$
* ----------------------------------------------------------------------
*
*)
open Pxp_document;;
open Pxp_yacc;;
open Pxp_types;;
open Pxp_ev_parser;;
let error_happened = ref false;;
let print_error e =
print_endline (string_of_exn e)
;;
class warner =
object
method warn w =
print_endline ("WARNING: " ^ w)
end
;;
let resolve_by_helper scheme program sends_mime_header =
let url_syntax =
{ Neturl.ip_url_syntax with
Neturl.url_accepts_8bits = true
}
in
let get_url id =
(* Only accept SYSTEM Ids with the right scheme: *)
match id with
System sysid ->
( try
let sysid_scheme =
try Neturl.extract_url_scheme sysid
with Neturl.Malformed_URL -> scheme
(* If no scheme found: assume our own scheme *)
in
if sysid_scheme = scheme then
Neturl.url_of_string url_syntax sysid (* or Malformed_URL *)
else
raise Pxp_reader.Not_competent
with
(* If the URL is syntactically wrong, do not accept it: *)
Neturl.Malformed_URL -> raise Pxp_reader.Not_competent
)
| _ ->
raise Pxp_reader.Not_competent
in
let read_mime_header ch =
let empty_re = Str.regexp "^[ \t\r\n]*$" in
let is_empty s = Str.string_match empty_re s 0 in
let buffer = Buffer.create 1024 in
let line = ref(input_line ch) in
if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then
line := input_line ch;
while not (is_empty !line) do
Buffer.add_string buffer !line;
Buffer.add_string buffer "\n";
line := input_line ch
done;
Buffer.add_string buffer "\n";
Buffer.contents buffer
in
let open_channel id url =
let url_string = Neturl.string_of_url url in
let command = program ^ " " ^ Filename.quote url_string in
let ch = Unix.open_process_in command in
if sends_mime_header then
let header_string = read_mime_header ch in
let header_alist,_ = Mimestring.scan_header
header_string 0 (String.length header_string) in
let content_type =
try List.assoc "content-type" header_alist
with Not_found -> "application/octet-stream" in
let mime_type, mime_type_params =
Mimestring.scan_mime_type content_type [] in
let encoding =
try Some(Netconversion.encoding_of_string
(List.assoc "charset" mime_type_params))
with Not_found -> None in
ch, encoding
else
ch, None
in
let close_channel ch =
match Unix.close_process_in ch with
Unix.WEXITED 0 ->
()
| Unix.WEXITED n ->
failwith("Command terminated with exit code " ^ string_of_int n)
| Unix.WSIGNALED n ->
failwith("Command terminated by signal " ^ string_of_int n)
| _ -> assert false
in
new Pxp_reader.resolve_read_url_channel
~close:close_channel
~url_of_id: get_url
~channel_of_url: open_channel
()
;;
let parse debug namespaces iso88591 helpers filename =
try
(* Parse the document: *)
let mng =
if namespaces then
Some (new Pxp_dtd.namespace_manager)
else
None
in
let resolver =
let file_resolver =
new Pxp_reader.resolve_as_file() in
new Pxp_reader.combine (helpers @ [file_resolver])
in
let start_id =
System filename in
let config =
{ default_config with
debugging_mode = debug;
encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
idref_pass = true;
enable_namespace_processing = mng;
warner = new warner
} in
let entmng =
create_entity_manager config (ExtID(start_id, resolver)) in
let pullparse =
create_pull_parser
config
(`Entry_document [])
entmng in
let x = ref (pullparse()) in
while !x <> None do
x := pullparse()
done;
()
with
e ->
(* Print error; remember that there was an error *)
error_happened := true;
print_error e;
(* raise e *)
;;
let main() =
let debug = ref false in
let namespaces = ref false in
let iso88591 = ref false in
let helpers = ref [] in
let files = ref [] in
let eq_split s =
let eq =
try String.index s '='
with Not_found -> raise(Arg.Bad "Syntax error")
in
let before_eq = String.sub s 0 eq in
let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in
(before_eq, after_eq)
in
let add_helper sends_mime_header s =
let scheme,cmd = eq_split s in
let h = resolve_by_helper scheme cmd sends_mime_header in
helpers := !helpers @ [h]
in
let add_pubid s =
let pubid,filename = eq_split s in
let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in
helpers := !helpers @ [h]
in
let add_sysid s =
let sysid,filename = eq_split s in
let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in
helpers := !helpers @ [h]
in
Arg.parse
[ "-d", Arg.Set debug,
" turn debugging mode on";
"-namespaces", Arg.Set namespaces,
" enable namespace support";
"-iso-8859-1", Arg.Set iso88591,
" use ISO-8859-1 as internal encoding instead of UTF-8";
"-helper", Arg.String (add_helper false),
"scheme=cmd add this helper command";
"-helper-mh", Arg.String (add_helper true),
"scheme=cmd add this helper command (which sends mime headers)";
"-pubid", Arg.String add_pubid,
"id=file map this PUBLIC id to this file";
"-sysid", Arg.String add_sysid,
"id=file map this SYSTEM id to this file";
]
(fun x -> files := x :: !files)
"
usage: pxpevparse [options] URL ...
- parses XML documents in well-formedness mode. See below for list of options.
<title>PXP - The XML parser for Objective Caml</title>
List of options:";
files := List.rev !files;
List.iter (parse !debug !namespaces !iso88591 !helpers) !files;
;;
main();
if !error_happened then exit(1);;
This diff is collapsed.
<!-- DTD for Shakespeare J. Bosak 1994.03.01, 1997.01.02 -->
<!-- Revised for case sensitivity 1997.09.10 -->
<!-- Revised for XML 1.0 conformity 1998.01.27 (thanks to Eve Maler) -->
<!ENTITY amp "&#38;#38;">
<!ELEMENT PLAY (TITLE, FM, PERSONAE, SCNDESCR, PLAYSUBT, INDUCT?,
PROLOGUE?, ACT+, EPILOGUE?)>
<!ELEMENT TITLE (#PCDATA)>
<!ELEMENT FM (P+)>
<!ELEMENT P (#PCDATA)>
<!ELEMENT PERSONAE (TITLE, (PERSONA | PGROUP)+)>
<!ELEMENT PGROUP (PERSONA+, GRPDESCR)>
<!ELEMENT PERSONA (#PCDATA)>
<!ELEMENT GRPDESCR (#PCDATA)>
<!ELEMENT SCNDESCR (#PCDATA)>
<!ELEMENT PLAYSUBT (#PCDATA)>
<!ELEMENT INDUCT (TITLE, SUBTITLE*, (SCENE+|(SPEECH|STAGEDIR|SUBHEAD)+))>
<!ELEMENT ACT (TITLE, SUBTITLE*, PROLOGUE?, SCENE+, EPILOGUE?)>
<!ELEMENT SCENE (TITLE, SUBTITLE*, (SPEECH | STAGEDIR | SUBHEAD)+)>
<!ELEMENT PROLOGUE (TITLE, SUBTITLE*, (STAGEDIR | SPEECH)+)>
<!ELEMENT EPILOGUE (TITLE, SUBTITLE*, (STAGEDIR | SPEECH)+)>
<!ELEMENT SPEECH (SPEAKER+, (LINE | STAGEDIR | SUBHEAD)+)>
<!ELEMENT SPEAKER (#PCDATA)>
<!ELEMENT LINE (#PCDATA | STAGEDIR)*>
<!ELEMENT STAGEDIR (#PCDATA)>
<!ELEMENT SUBTITLE (#PCDATA)>
<!ELEMENT SUBHEAD (#PCDATA)>
(* $Id$
* ----------------------------------------------------------------------
*
*)
open Pxp_document;;
open Pxp_yacc;;
open Pxp_types;;
let error_happened = ref false;;
let print_error e =
print_endline (string_of_exn e)
;;
class warner =
object
method warn w =
print_endline ("WARNING: " ^ w)
end
;;
let resolve_by_helper scheme program sends_mime_header =
let url_syntax =
{ Neturl.ip_url_syntax with
Neturl.url_accepts_8bits = true
}
in
let get_url id =
(* Only accept SYSTEM Ids with the right scheme: *)
match id with
System sysid ->
( try
let sysid_scheme =
try Neturl.extract_url_scheme sysid
with Neturl.Malformed_URL -> scheme
(* If no scheme found: assume our own scheme *)
in
if sysid_scheme = scheme then
Neturl.url_of_string url_syntax sysid (* or Malformed_URL *)
else
raise Pxp_reader.Not_competent
with
(* If the URL is syntactically wrong, do not accept it: *)
Neturl.Malformed_URL -> raise Pxp_reader.Not_competent
)
| _ ->
raise Pxp_reader.Not_competent
in
let read_mime_header ch =
let empty_re = Str.regexp "^[ \t\r\n]*$" in
let is_empty s = Str.string_match empty_re s 0 in
let buffer = Buffer.create 1024 in
let line = ref(input_line ch) in
if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then
line := input_line ch;
while not (is_empty !line) do
Buffer.add_string buffer !line;
Buffer.add_string buffer "\n";
line := input_line ch
done;
Buffer.add_string buffer "\n";
Buffer.contents buffer
in
let open_channel id url =
let url_string = Neturl.string_of_url url in
let command = program ^ " " ^ Filename.quote url_string in
let ch = Unix.open_process_in command in
if sends_mime_header then
let header_string = read_mime_header ch in
let header_alist,_ = Mimestring.scan_header
header_string 0 (String.length header_string) in
let content_type =
try List.assoc "content-type" header_alist
with Not_found -> "application/octet-stream" in
let mime_type, mime_type_params =
Mimestring.scan_mime_type content_type [] in
let encoding =
try Some(Netconversion.encoding_of_string
(List.assoc "charset" mime_type_params))
with Not_found -> None in
ch, encoding
else
ch, None
in
let close_channel ch =
match Unix.close_process_in ch with
Unix.WEXITED 0 ->
()
| Unix.WEXITED n ->
failwith("Command terminated with exit code " ^ string_of_int n)
| Unix.WSIGNALED n ->
failwith("Command terminated by signal " ^ string_of_int n)
| _ -> assert false
in
new Pxp_reader.resolve_read_url_channel
~close:close_channel
~url_of_id: get_url
~channel_of_url: open_channel
()
;;
let parse debug wf namespaces iso88591 helpers filename =
try
(* Parse the document: *)
let parse_fn =
if wf then parse_wfdocument_entity ?transform_dtd:None
else
let index = new hash_index in
parse_document_entity
?transform_dtd:None
~id_index:(index :> 'ext index)
in
let mng =
if namespaces then
Some (new Pxp_dtd.namespace_manager)
else
None
in
let resolver =
let file_resolver =
new Pxp_reader.resolve_as_file() in
new Pxp_reader.combine (helpers @ [file_resolver])
in
let start_id =
System filename in
let doc =
parse_fn
{ default_config with
debugging_mode = debug;
encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
idref_pass = true;
enable_namespace_processing = mng;
warner = new warner
}
(ExtID(start_id, resolver))
default_spec
in
()
with
e ->
(* Print error; remember that there was an error *)
error_happened := true;
print_error e;
(* raise e *)
;;
let main() =
let debug = ref false in
let wf = ref false in
let namespaces = ref false in
let iso88591 = ref false in
let helpers = ref [] in
let files = ref [] in
let eq_split s =
let eq =
try String.index s '='
with Not_found -> raise(Arg.Bad "Syntax error")
in
let before_eq = String.sub s 0 eq in
let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in
(before_eq, after_eq)
in
let add_helper sends_mime_header s =
let scheme,cmd = eq_split s in
let h = resolve_by_helper scheme cmd sends_mime_header in
helpers := !helpers @ [h]
in
let add_pubid s =
let pubid,filename = eq_split s in
let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in
helpers := !helpers @ [h]
in
let add_sysid s =
let sysid,filename = eq_split s in
let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in
helpers := !helpers @ [h]
in
Arg.parse
[ "-d", Arg.Set debug,
" turn debugging mode on";
"-wf", Arg.Set wf,
" check only for well-formedness";
"-namespaces", Arg.Set namespaces,
" enable namespace support";
"-iso-8859-1", Arg.Set iso88591,
" use ISO-8859-1 as internal encoding instead of UTF-8";
"-helper", Arg.String (add_helper false),
"scheme=cmd add this helper command";
"-helper-mh", Arg.String (add_helper true),
"scheme=cmd add this helper command (which sends mime headers)";
"-pubid", Arg.String add_pubid,
"id=file map this PUBLIC id to this file";
"-sysid", Arg.String add_sysid,
"id=file map this SYSTEM id to this file";
]
(fun x -> files := x :: !files)
"
usage: pxpvalidate [options] URL ...
- checks the validity of XML documents. See below for list of options.
<title>PXP - The XML parser for Objective Caml</title>
List of options:";
files := List.rev !files;
List.iter (parse !debug !wf !namespaces !iso88591 !helpers) !files;
;;
main();
if !error_happened then exit(1);;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment