improved escaping in regular expressions

parent 605f9e2a
(define-module (language python module re parser)
#:use-module (language python guilemod)
#:use-module (parser stis-parser)
#:use-module (ice-9 unicode)
#:export(parse-reg e-matcher pretty))
(define byte-special
(let ()
(define fx (f-reg! "[0-9a-fA-F]"))
(define fo (f-reg! "[0-7]"))
(define he (mk-token (f-seq fx fx) (lambda (x) (string->number x 16))))
(define oc (mk-token (f-seq fo fo fo) (lambda (x) (string->number x 8))))
(f-seq (f-tag "a") (f-out 7))
(f-seq (f-tag "b") (f-out 8))
(f-seq (f-tag "f") (f-out 12))
(f-seq (f-tag "n") (f-out 10))
(f-seq (f-tag "r") (f-out 13))
(f-seq (f-tag "t") (f-out 9))
(f-seq (f-tag "v") (f-out 11))
(f-seq (f-tag "x") he)
(define f-esc
(let ()
(define fi (f-reg! "[0-9a-fA-F]"))
(define he4 (mk-token (f-seq fi fi fi fi)
(lambda (x) (string->number x 16))))
(define he8 (mk-token (f-seq fi fi fi fi fi fi fi fi)
(lambda (x) (string->number x 16))))
(define name (f-seq (f-tag "{") (mk-token (f+ (f-not! (f-tag "}"))))
(f-tag "}")))
(define (check n x)
(if x x (error (format #f "Wrong unicode name N{~a}" n))))
(f-seq (f-tag "\\")
(f-let ((x byte-special))
(f-out (integer->char x)))
(f-seq (f-tag "N")
(f-let ((n name))
(f-out (check n (formal-name->char n)))))
(f-seq (f-tag "u")
(f-let ((x he4))
(f-out (integer->char x))))
(f-seq (f-tag "U")
(f-let ((x he8))
(f-out (integer->char x))))
(f-reg! "."))))))
(define-syntax-rule (mk n tag str) (define n (f-seq tag (f-tag str))))
(mk f-. #:dot ".")
(mk f-^ #:^ "^")
......@@ -26,9 +75,11 @@
(define anongroup (f-list #:?: "(?:" (Ds ee) ")"))
(define namegroup (f-list #:?P< "(?P<" (mk-token (f+ (f-not! (f-reg "[> ]")))) ">" (Ds ee) ")"))
(define (ch not)
(f-list #:ch
(f-or! (f-seq (f-char #\\) f-back)
(f-or! f-esc
(f-seq (f-char #\\) f-back)
(mk-token (f-not! not)))))
(define number (mk-token (f+ (f-reg! "[0-9]")) string->number))
(define incant (f-list #:?P= "(?P=" (f-or! number
......@@ -58,6 +109,7 @@
(define (bch f) (f-or! (f-seq (f-or! (f-tag "\\n") f-nl)
(f-out (list->string (list #\newline))))
(f-seq (f-char #\\) f-back)
(define bbody (f-cons (f-or!
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment