Commit 4688b3b0 authored by gerd's avatar gerd

Support for ulex.


git-svn-id: https://godirepo.camlcity.org/svn/lib-pxp/trunk@667 dbe99aee-44db-0310-b2b3-d33182c8eb97
parent c5a2c2d3
......@@ -4,7 +4,7 @@ include $(TOP_DIR)/Makefile.rules
all: lexpp$(EXEC_SUFFIX)
DEPOBJS = lexpp_file.ml uni_lexer.ml uni_parser.ml uni_parser.mli \
uni_types.ml ucs2_to_utf8.ml main.ml
uni_types.ml ucs2_to_utf8.ml main.ml mll_lexer.ml
OBJS = uni_types.cmo uni_lexer.cmo uni_parser.cmo lexpp_file.cmo \
mll_lexer.cmo \
......@@ -12,16 +12,16 @@ OBJS = uni_types.cmo uni_lexer.cmo uni_parser.cmo lexpp_file.cmo \
PACKAGES=netstring
uni_lexer.ml:
uni_lexer.ml: uni_lexer.mll
$(OCAMLLEX) uni_lexer.mll
mll_lexer.ml:
mll_lexer.ml: mll_lexer.mll
$(OCAMLLEX) mll_lexer.mll
uni_parser.ml:
uni_parser.ml: uni_parser.mly
$(OCAMLYACC) uni_parser.mly
uni_parser.mli:
uni_parser.mli: uni_parser.mly
$(OCAMLYACC) uni_parser.mly
depend: $(DEPOBJS)
......
......@@ -103,3 +103,40 @@ let print_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
output_string out "\n\n"
;;
(**********************************************************************)
(* print a definition in the format expected by ulex: *)
(**********************************************************************)
let rec print_ulex_disjunction ?(first = true) out =
function
[] ->
if first then output_string out " ['b'-'a' (*empty*) ] "
| he::tl ->
if not first then output_string out " | " ;
print_ulex_re out he ;
print_ulex_disjunction ~first:false out tl
and print_ulex_re out =
function
Uni_types.Char i -> output_string out (string_of_int i)
| Uni_types.Interval (l,u) ->
output_string out ("[" ^ string_of_int l ^ "-" ^
string_of_int u ^ "]")
| Uni_types.Identifier i -> output_string out i
| Uni_types.Concat rell ->
let foo rel =
if List.length rel > 1 then
(output_string out "(" ; print_ulex_disjunction out rel ;
output_string out ")")
else
print_ulex_disjunction out rel
in
List.iter foo rell
;;
let print_ulex_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
output_string out ("let regexp " ^ id ^ " =\n ") ;
print_ulex_disjunction out rel ;
output_string out "\n\n"
;;
This diff is collapsed.
(* $Id$ *)
{ let rec recurse_until stop_tok f lexbuf =
{
type mlltok =
[ `Brace of Lexing.position * mlltok list
| `Bracket of mlltok list
| `Char of char
| `Charliteral of string
| `Comment of mlltok list
| `EOF
| `E_Brace
| `E_Bracket
| `E_Comment
| `E_Paren
| `Ident of string
| `Paren of mlltok list
| `Stringliteral of string
| `Sep of string (* "" or white space string *)
]
let rec recurse_until stop_tok f lexbuf =
let tok = f lexbuf in
if tok = stop_tok then
[]
......@@ -14,35 +32,29 @@
let ident_start = [ 'A'-'Z' 'a'-'z' '_' ]
let ident_rest = [ 'A'-'Z' 'a'-'z' '_' '\'' '0'-'9' ]
rule definition want_ws = parse
[' ' '\013' '\009' '\012' '\010' ]
rule definition = parse
[' ' '\013' '\009' '\012' '\010' ]+
{ let s = Lexing.lexeme lexbuf in
if want_ws then
`WS s.[0]
else
definition want_ws lexbuf
`Sep s
}
| "(*"
{ let r =
`Comment (recurse_until `E_Comment (definition true) lexbuf) in
if want_ws then
r
else
definition want_ws lexbuf
`Comment (recurse_until `E_Comment definition lexbuf) in
r
}
| "*)"
{ `E_Comment }
| '{'
{ let p = Lexing.lexeme_start_p lexbuf in
`Brace (p, recurse_until `E_Brace (definition true) lexbuf) }
`Brace (p, recurse_until `E_Brace definition lexbuf) }
| '}'
{ `E_Brace }
| '('
{ `Paren (recurse_until `E_Paren (definition want_ws) lexbuf) }
{ `Paren (recurse_until `E_Paren definition lexbuf) }
| ')'
{ `E_Paren }
| '['
{ `Bracket (recurse_until `E_Bracket (definition want_ws) lexbuf) }
{ `Bracket (recurse_until `E_Bracket definition lexbuf) }
| ']'
{ `E_Bracket }
| '"'
......@@ -57,6 +69,7 @@ rule definition want_ws = parse
| '\'' '\\' ( [ 'n' 't' 'b' 'r' '"' '\\' '\'' ] |
( ['0'-'9'] ['0'-'9'] ['0'-'9'] ) |
( 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] ) )
'\''
{ let s = Lexing.lexeme lexbuf in
`Charliteral (String.sub s 1 (String.length s - 2))
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment