Commit 37fe12a4 authored by Pascal J. Bourguignon's avatar Pascal J. Bourguignon

c11-scanner: cleaned up tokens; c11-parser: removed bugs, added actions.

parent 99f66026
This diff is collapsed.
...@@ -34,19 +34,22 @@ ...@@ -34,19 +34,22 @@
(in-package "COM.INFORMATIMAGO.LANGUAGES.C11.SCANNER") (in-package "COM.INFORMATIMAGO.LANGUAGES.C11.SCANNER")
(define-scanner c11-scanner (define-scanner c11-scanner
;; This scanner is not used by c11-parser, but by read-yacc.
:terminals ( :terminals (
"!" "!=" "%" "%=" "%>" "&" "&&" "&=" "(" ")" "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
"*" "*=" "+" "++" "+=" "," "-" "--" "-=" "->" "." "..." "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"/" "/=" ":" ":>" ";" "<" "<%" "<:" "<<" "<<=" "<=" "=" "_Thread_local" "auto" "break" "case" "char" "const"
"==" ">" ">=" ">>" ">>=" "?" "[" "]" "^" "^=" "_Bool" "continue" "default" "do" "double" "else" "enum" "extern"
"_Complex" "_Imaginary" "__asm__" "__builtin_va_list" "float" "for" "goto" "if" "inline" "int" "long" "register"
"__const" "__inline" "__inline__" "__restrict" "asm"
"auto" "break" "case" "char" "const" "continue"
"default" "do" "double" "else" "enum" "extern" "float"
"for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static" "restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void" "struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while" "{" "|" "|=" "||" "}" "~" "~=" "volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?"
(identifier "[a-zA-Z_$][a-zA-Z_$0-9]*") (identifier "[a-zA-Z_$][a-zA-Z_$0-9]*")
(hex "0[xX][0-9A-Fa-f]+[uUlL]*") (hex "0[xX][0-9A-Fa-f]+[uUlL]*")
(oct "0[0-7]+[uUlL]*") (oct "0[0-7]+[uUlL]*")
...@@ -68,57 +71,77 @@ ...@@ -68,57 +71,77 @@
'string))) 'string)))
(defparameter *parser-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.PARSER"))
"Package where the token kinds are interned.")
(defparameter *symbol-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))
"Package where the identifiers of the C program are interned.")
(defparameter *c11-literal-tokens* (defparameter *c11-literal-tokens*
'("!" "!=" "%" "%=" "%>" "&" "&&" "&=" "(" ")" '("_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
"*" "*=" "+" "++" "+=" "," "-" "--" "-=" "->" "." "..." "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"/" "/=" ":" ":>" ";" "<" "<%" "<:" "<<" "<<=" "<=" "=" "_Thread_local" "auto" "break" "case" "char" "const"
"==" ">" ">=" ">>" ">>=" "?" "[" "]" "^" "^=" "_Bool" "continue" "default" "do" "double" "else" "enum" "extern"
"_Complex" "_Imaginary" "__asm__" "__builtin_va_list" "float" "for" "goto" "if" "inline" "int" "long" "register"
"__const" "__inline" "__inline__" "__restrict" "asm"
"auto" "break" "case" "char" "const" "continue"
"default" "do" "double" "else" "enum" "extern" "float"
"for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static" "restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void" "struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while" "{" "|" "|=" "||" "}" "~" "~=")) "volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?"))
(defparameter *c11-literal-tokens-map* (defparameter *c11-literal-tokens-map*
(let ((table (make-hash-table :test 'equal))) (load-time-value
(dolist (token *c11-literal-tokens* table) (let ((table (make-hash-table :test 'equal)))
(setf (gethash token table) (intern token))))) (dolist (token *c11-literal-tokens* table)
(setf (gethash token table)
(let ((name (string-upcase token)))
(when (char= (aref name 0) #\_)
(setf name (substitute #\- #\_ (subseq name 1))))
(intern name *parser-package*)))))))
(defparameter *c11-regexp-tokens* (defparameter *c11-regexp-tokens*
;; order matters ;; order matters
'((|string_literal| '((string-literal
(str "L?\"(\\.|[^\\\"])*\"")) (str "^L?\"(\\.|[^\\\"])*\"$"))
(|i_constant| (i-constant
(lchar "L?'(\\.|[^\\'])+'")) (lchar "^L?'(\\.|[^\\'])+'$"))
(|identifier| (identifier
(identifier "[a-zA-Z_$][a-zA-Z_$0-9]*")) (identifier "^[a-zA-Z_$][a-zA-Z_$0-9]*$"))
(|f_constant| (f-constant
(flt1 "[0-9]+[Ee][-+]?[0-9]+[fFlL]?") (flt1 "^[0-9]+[Ee][-+]?[0-9]+[fFlL]?$")
(flt2 "[0-9]*\\.[0-9]+([Ee][-+]?[0-9]+)?[fFlL]?") (flt2 "^[0-9]*\\.[0-9]+([Ee][-+]?[0-9]+)?[fFlL]?$")
(flt3 "[0-9]+\\.[0-9]*([Ee][-+]?[0-9]+)?[fFlL]?")) (flt3 "^[0-9]+\\.[0-9]*([Ee][-+]?[0-9]+)?[fFlL]?$"))
(|i_constant| (i-constant
(hex "0[xX][0-9A-Fa-f]+[uUlL]*") (hex "^0[xX][0-9A-Fa-f]+[uUlL]*$")
(oct "0[0-7]+[uUlL]*") (oct "^0[0-7]+[uUlL]*$")
(dec "[0-9]+[uUlL]*")))) (dec "^[0-9]+[uUlL]*$"))))
(defvar *context*) (defvar *context*)
(defun compute-token-kind (token)
(let ((text (token-text token))) (defun upgrade-c11-token (token)
(or (gethash text *c11-literal-tokens-map*) (let* ((text (token-text token))
(literal (gethash text *c11-literal-tokens-map*)))
(if literal
(setf (token-kind token) literal)
(let ((kind (first (find-if (lambda (entry) (let ((kind (first (find-if (lambda (entry)
(some (lambda (regexp) (some (lambda (regexp)
(string-match (format nil "^~A$" (second regexp)) text)) (string-match (second regexp) text))
(rest entry))) (rest entry)))
*c11-regexp-tokens*)))) *c11-regexp-tokens*))))
(if (eq kind '|identifier|) (if (eq kind 'identifier)
(cond (setf (token-symbol token) (intern (token-text token) *symbol-package*)
((typedef-name-p *context* token) '|typedef_name|) (token-kind token) (cond
((function-name-p *context* token) '|func_name|) ((typedef-name-p *context* token) 'typedef-name)
((enumeration-constant-name-p *context* token) '|enumeration_constant|) ((function-name-p *context* token) 'func-name)
(t kind)) ((enumeration-constant-name-p *context* token) 'enumeration-constant)
kind))))) (t 'identifier)))
(setf (token-kind token) kind))))
token))
;;;; THE END ;;;; ;;;; THE END ;;;;
...@@ -75,12 +75,12 @@ The scanner uses it to detect enumeration_constant tokens." ...@@ -75,12 +75,12 @@ The scanner uses it to detect enumeration_constant tokens."
(defun identifier-in-table-p (context table name) (defun identifier-in-table-p (context table name)
(declare (ignore context)) (declare (ignore context))
(and (eq '|identifier| (token-kind name)) (and (eq 'identifier (token-kind name))
(gethash (token-symbol name) table))) (gethash (token-symbol name) table)))
(defun enter-into-table (context table kind name definition) (defun enter-into-table (context table kind name definition)
(declare (ignore context kind)) (declare (ignore context kind))
(assert (eq '|identifier| (token-kind name)) (name)) (assert (eq 'identifier (token-kind name)) (name))
(setf (gethash (token-symbol name) table) definition)) (setf (gethash (token-symbol name) table) definition))
(defmethod typedef-name-p ((context context) token) (defmethod typedef-name-p ((context context) token)
...@@ -94,15 +94,15 @@ The scanner uses it to detect enumeration_constant tokens." ...@@ -94,15 +94,15 @@ The scanner uses it to detect enumeration_constant tokens."
(defgeneric enter-typedef (context name &optional definition) (defgeneric enter-typedef (context name &optional definition)
(:method ((context context) name &optional (definition t)) (:method ((context context) name &optional (definition t))
(enter-into-table context (context-typedefs context) '|typedef_name| name definition))) (enter-into-table context (context-typedefs context) 'typedef-name name definition)))
(defgeneric enter-function (context name &optional definition) (defgeneric enter-function (context name &optional definition)
(:method ((context context) name &optional (definition t)) (:method ((context context) name &optional (definition t))
(enter-into-table context (context-functions context) '|func_name| name definition))) (enter-into-table context (context-functions context) 'func-name name definition)))
(defgeneric enter-enumeration-constant (context name &optional definition) (defgeneric enter-enumeration-constant (context name &optional definition)
(:method ((context context) name &optional (definition t)) (:method ((context context) name &optional (definition t))
(enter-into-table context (context-enumeration-constants context) '|enum_name| name definition))) (enter-into-table context (context-enumeration-constants context) 'enum-name name definition)))
;;;; THE END ;;;; ;;;; THE END ;;;;
...@@ -36,19 +36,30 @@ ...@@ -36,19 +36,30 @@
(:use) (:use)
(:import-from "COMMON-LISP" (:import-from "COMMON-LISP"
"*" ">=" "/" "-" "++" "+" ">" "=" "<" "<=" "/=") "*" ">=" "/" "-" "++" "+" ">" "=" "<" "<=" "/=")
(:export "identifier" "typedef_name" "func_name" "string_literal" (:import-from "COM.INFORMATIMAGO.LANGUAGES.CPP"
"i_constant" "f_constant" "enum_name" "alignas" "alignof" "IDENTIFIER" "STRING-LITERAL"
"atomic" "generic" "noreturn" "static_assert" ;; "TYPEDEF-NAME" "FUNC-NAME"
"thread_local" "case" "default" "if" "else" "switch" ;; "I-CONSTANT" "F-CONSTANT" "ENUM-NAME"
"while" "do" "for" "goto" "continue" "break" "return" )
"struct" "union" "enum" "..." "complex" "imaginary" "bool" (:export "IDENTIFIER" "TYPEDEF-NAME" "FUNC-NAME" "STRING-LITERAL"
"char" "short" "int" "long" "signed" "unsigned" "float" "I-CONSTANT" "F-CONSTANT" "ENUM-NAME"
"double" "void" "const" "restrict" "volatile" "typedef" "STAR"
"extern" "static" "auto" "register" "inline" "sizeof" "^=" ;; -
"|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%=" "+="
"->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")" "," "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
":" ";" "." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!" "%" "_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"<" ">" "=" "^" "|" "?" "STAR") "_Thread_local" "auto" "break" "case" "char" "const"
"continue" "default" "do" "double" "else" "enum" "extern"
"float" "for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?")
(:documentation "This package exports the token-kinds of the C11 terminal symbols.")) (:documentation "This package exports the token-kinds of the C11 terminal symbols."))
(defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT" (defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
...@@ -71,11 +82,12 @@ ...@@ -71,11 +82,12 @@
"COM.INFORMATIMAGO.COMMON-LISP.PARSER.SCANNER" "COM.INFORMATIMAGO.COMMON-LISP.PARSER.SCANNER"
"COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP" "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS" "COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS"
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT") "COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
"COM.INFORMATIMAGO.LANGUAGES.CPP")
(:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP" (:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"SPLIT-STRING") "SPLIT-STRING")
(:export "C11-SCANNER" (:export "C11-SCANNER"
"COMPUTE-TOKEN-KIND")) "UPGRADE-C11-TOKEN"))
(defpackage "COM.INFORMATIMAGO.LANGUAGES.YACC.PARSER" (defpackage "COM.INFORMATIMAGO.LANGUAGES.YACC.PARSER"
(:use "COMMON-LISP" (:use "COMMON-LISP"
...@@ -99,7 +111,8 @@ returning a yacc:defgrammar form. ...@@ -99,7 +111,8 @@ returning a yacc:defgrammar form.
(defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.PARSER" (defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.PARSER"
(:use "CL-STEPPER"; "COMMON-LISP" (:use ;; "CL-STEPPER"
"COMMON-LISP"
"COM.INFORMATIMAGO.RDP" "COM.INFORMATIMAGO.RDP"
"COM.INFORMATIMAGO.COMMON-LISP.CESARUM.LIST" "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.LIST"
"COM.INFORMATIMAGO.COMMON-LISP.CESARUM.STREAM" "COM.INFORMATIMAGO.COMMON-LISP.CESARUM.STREAM"
...@@ -110,7 +123,8 @@ returning a yacc:defgrammar form. ...@@ -110,7 +123,8 @@ returning a yacc:defgrammar form.
"COM.INFORMATIMAGO.LANGUAGES.CPP" "COM.INFORMATIMAGO.LANGUAGES.CPP"
"COM.INFORMATIMAGO.TOOLS.READER-MACRO" "COM.INFORMATIMAGO.TOOLS.READER-MACRO"
"COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS" "COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS"
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT") "COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
"COM.INFORMATIMAGO.LANGUAGES.C11.SCANNER")
(:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP" (:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"SPLIT-STRING") "SPLIT-STRING")
(:export "C11-PARSER")) (:export "C11-PARSER"))
......
...@@ -6,23 +6,22 @@ ...@@ -6,23 +6,22 @@
;; (untrace compute-token-kind) ;; (untrace compute-token-kind)
;; 7 seconds. ;; 7 seconds.
(defparameter *tc* (defparameter *tc* (reduce (function append)
(let ((tokens (reduce (function append)
(reverse (com.informatimago.languages.cpp::context-output-lines (reverse (com.informatimago.languages.cpp::context-output-lines
(let ((*identifier-package* (let ((*identifier-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C")))) (load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))))
(cpp-e "/Users/pjb/src/public/lisp/languages/cpp/tests/emacs.c" (cpp-e "/Users/pjb/src/public/lisp/languages/cpp/tests/emacs.c"
:trace-includes t :trace-includes t
:defines '("__GNUC__" "4" "__STDC__" "1" "__x86_64__" "1") :defines '(;; "__GNUC__" "4"
"__STDC__" "1"
"__x86_64__" "1")
:includes '("/Users/pjb/src/macosx/emacs-24.5/src/") :includes '("/Users/pjb/src/macosx/emacs-24.5/src/")
:include-bracket-directories '("/Users/pjb/src/macosx/emacs-24.5/src/" :include-bracket-directories '("/Users/pjb/src/macosx/emacs-24.5/src/"
"/Users/pjb/src/macosx/emacs-24.5/lib/" "/Users/pjb/src/macosx/emacs-24.5/lib/"
"/Users/pjb/src/macosx/gcc-4.9.2/gcc/ginclude/" "/Users/pjb/src/macosx/gcc-4.9.2/gcc/ginclude/"
"/usr/include/") "/usr/include/")
:write-processed-lines nil)))) :write-processed-lines nil))))
:initial-value '()))) :initial-value '()))
(dolist (token tokens tokens)
(setf (token-kind token) (com.informatimago.languages.c11.scanner:compute-token-kind token)))))
(with-open-file (out "p.lisp" :direction :output :if-exists :supersede :if-does-not-exist :create) (with-open-file (out "p.lisp" :direction :output :if-exists :supersede :if-does-not-exist :create)
...@@ -32,17 +31,38 @@ ...@@ -32,17 +31,38 @@
(pprint grammar out)) (pprint grammar out))
(dolist (form rest) (dolist (form rest)
(pprint form out)))) (pprint form out))))
(load "p.lisp")
(defvar *scanner* nil) (defvar *scanner* nil)
(defun test/parse-stream (tokens) (defun test/parse-stream (&optional tokens)
(declare (stepper disable)) (declare (stepper disable))
(let ((*scanner* (make-instance 'pre-scanned-scanner :tokens tokens)) (let ((tokens (or tokens
(*context* (make-instance 'context))) (reduce (function append)
(loop (reverse (com.informatimago.languages.cpp::context-output-lines
:until (scanner-end-of-source-p *scanner*) (let ((*identifier-package*
:collect (handler-bind ((parser-end-of-source-not-reached #'continue)) (load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))))
(parse-c11 *scanner*))))) (cpp-e "/Users/pjb/src/public/lisp/languages/cpp/tests/emacs.c"
:trace-includes t
:defines '(;; "__GNUC__" "4"
"__STDC__" "1"
"__x86_64__" "1")
:includes '("/Users/pjb/src/macosx/emacs-24.5/src/")
:include-bracket-directories '("/Users/pjb/src/macosx/emacs-24.5/src/"
"/Users/pjb/src/macosx/emacs-24.5/lib/"
"/Users/pjb/src/macosx/gcc-4.9.2/gcc/ginclude/"
"/usr/include/")
:write-processed-lines nil))))
:initial-value '()))))
(setf *tc* tokens)
(let ((*scanner* (make-instance 'pre-scanned-scanner :tokens tokens))
(*context* (make-instance 'context)))
(loop
:until (scanner-end-of-source-p *scanner*)
:collect (handler-bind ((parser-end-of-source-not-reached #'continue))
(parse-c11 *scanner*))))))
(step (test/parse-stream *tc*) :trace) (step (test/parse-stream *tc*) :trace)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment