Commit 37fe12a4 authored by Pascal J. Bourguignon's avatar Pascal J. Bourguignon

c11-scanner: cleaned up tokens; c11-parser: removed bugs, added actions.

parent 99f66026
This diff is collapsed.
......@@ -34,19 +34,22 @@
(in-package "COM.INFORMATIMAGO.LANGUAGES.C11.SCANNER")
(define-scanner c11-scanner
;; This scanner is not used by c11-parser, but by read-yacc.
:terminals (
"!" "!=" "%" "%=" "%>" "&" "&&" "&=" "(" ")"
"*" "*=" "+" "++" "+=" "," "-" "--" "-=" "->" "." "..."
"/" "/=" ":" ":>" ";" "<" "<%" "<:" "<<" "<<=" "<=" "="
"==" ">" ">=" ">>" ">>=" "?" "[" "]" "^" "^=" "_Bool"
"_Complex" "_Imaginary" "__asm__" "__builtin_va_list"
"__const" "__inline" "__inline__" "__restrict" "asm"
"auto" "break" "case" "char" "const" "continue"
"default" "do" "double" "else" "enum" "extern" "float"
"for" "goto" "if" "inline" "int" "long" "register"
"_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
"_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"_Thread_local" "auto" "break" "case" "char" "const"
"continue" "default" "do" "double" "else" "enum" "extern"
"float" "for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while" "{" "|" "|=" "||" "}" "~" "~="
"volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?"
(identifier "[a-zA-Z_$][a-zA-Z_$0-9]*")
(hex "0[xX][0-9A-Fa-f]+[uUlL]*")
(oct "0[0-7]+[uUlL]*")
......@@ -68,57 +71,77 @@
'string)))
(defparameter *parser-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.PARSER"))
"Package where the token kinds are interned.")
(defparameter *symbol-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))
"Package where the identifiers of the C program are interned.")
(defparameter *c11-literal-tokens*
'("!" "!=" "%" "%=" "%>" "&" "&&" "&=" "(" ")"
"*" "*=" "+" "++" "+=" "," "-" "--" "-=" "->" "." "..."
"/" "/=" ":" ":>" ";" "<" "<%" "<:" "<<" "<<=" "<=" "="
"==" ">" ">=" ">>" ">>=" "?" "[" "]" "^" "^=" "_Bool"
"_Complex" "_Imaginary" "__asm__" "__builtin_va_list"
"__const" "__inline" "__inline__" "__restrict" "asm"
"auto" "break" "case" "char" "const" "continue"
"default" "do" "double" "else" "enum" "extern" "float"
"for" "goto" "if" "inline" "int" "long" "register"
'("_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
"_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"_Thread_local" "auto" "break" "case" "char" "const"
"continue" "default" "do" "double" "else" "enum" "extern"
"float" "for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while" "{" "|" "|=" "||" "}" "~" "~="))
"volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?"))
(defparameter *c11-literal-tokens-map*
(let ((table (make-hash-table :test 'equal)))
(dolist (token *c11-literal-tokens* table)
(setf (gethash token table) (intern token)))))
(load-time-value
(let ((table (make-hash-table :test 'equal)))
(dolist (token *c11-literal-tokens* table)
(setf (gethash token table)
(let ((name (string-upcase token)))
(when (char= (aref name 0) #\_)
(setf name (substitute #\- #\_ (subseq name 1))))
(intern name *parser-package*)))))))
(defparameter *c11-regexp-tokens*
;; order matters
'((|string_literal|
(str "L?\"(\\.|[^\\\"])*\""))
(|i_constant|
(lchar "L?'(\\.|[^\\'])+'"))
(|identifier|
(identifier "[a-zA-Z_$][a-zA-Z_$0-9]*"))
(|f_constant|
(flt1 "[0-9]+[Ee][-+]?[0-9]+[fFlL]?")
(flt2 "[0-9]*\\.[0-9]+([Ee][-+]?[0-9]+)?[fFlL]?")
(flt3 "[0-9]+\\.[0-9]*([Ee][-+]?[0-9]+)?[fFlL]?"))
(|i_constant|
(hex "0[xX][0-9A-Fa-f]+[uUlL]*")
(oct "0[0-7]+[uUlL]*")
(dec "[0-9]+[uUlL]*"))))
'((string-literal
(str "^L?\"(\\.|[^\\\"])*\"$"))
(i-constant
(lchar "^L?'(\\.|[^\\'])+'$"))
(identifier
(identifier "^[a-zA-Z_$][a-zA-Z_$0-9]*$"))
(f-constant
(flt1 "^[0-9]+[Ee][-+]?[0-9]+[fFlL]?$")
(flt2 "^[0-9]*\\.[0-9]+([Ee][-+]?[0-9]+)?[fFlL]?$")
(flt3 "^[0-9]+\\.[0-9]*([Ee][-+]?[0-9]+)?[fFlL]?$"))
(i-constant
(hex "^0[xX][0-9A-Fa-f]+[uUlL]*$")
(oct "^0[0-7]+[uUlL]*$")
(dec "^[0-9]+[uUlL]*$"))))
(defvar *context*)
(defun compute-token-kind (token)
(let ((text (token-text token)))
(or (gethash text *c11-literal-tokens-map*)
(defun upgrade-c11-token (token)
(let* ((text (token-text token))
(literal (gethash text *c11-literal-tokens-map*)))
(if literal
(setf (token-kind token) literal)
(let ((kind (first (find-if (lambda (entry)
(some (lambda (regexp)
(string-match (format nil "^~A$" (second regexp)) text))
(rest entry)))
(some (lambda (regexp)
(string-match (second regexp) text))
(rest entry)))
*c11-regexp-tokens*))))
(if (eq kind '|identifier|)
(cond
((typedef-name-p *context* token) '|typedef_name|)
((function-name-p *context* token) '|func_name|)
((enumeration-constant-name-p *context* token) '|enumeration_constant|)
(t kind))
kind)))))
(if (eq kind 'identifier)
(setf (token-symbol token) (intern (token-text token) *symbol-package*)
(token-kind token) (cond
((typedef-name-p *context* token) 'typedef-name)
((function-name-p *context* token) 'func-name)
((enumeration-constant-name-p *context* token) 'enumeration-constant)
(t 'identifier)))
(setf (token-kind token) kind))))
token))
;;;; THE END ;;;;
......@@ -75,12 +75,12 @@ The scanner uses it to detect enumeration_constant tokens."
(defun identifier-in-table-p (context table name)
(declare (ignore context))
(and (eq '|identifier| (token-kind name))
(and (eq 'identifier (token-kind name))
(gethash (token-symbol name) table)))
(defun enter-into-table (context table kind name definition)
(declare (ignore context kind))
(assert (eq '|identifier| (token-kind name)) (name))
(assert (eq 'identifier (token-kind name)) (name))
(setf (gethash (token-symbol name) table) definition))
(defmethod typedef-name-p ((context context) token)
......@@ -94,15 +94,15 @@ The scanner uses it to detect enumeration_constant tokens."
(defgeneric enter-typedef (context name &optional definition)
(:method ((context context) name &optional (definition t))
(enter-into-table context (context-typedefs context) '|typedef_name| name definition)))
(enter-into-table context (context-typedefs context) 'typedef-name name definition)))
(defgeneric enter-function (context name &optional definition)
(:method ((context context) name &optional (definition t))
(enter-into-table context (context-functions context) '|func_name| name definition)))
(enter-into-table context (context-functions context) 'func-name name definition)))
(defgeneric enter-enumeration-constant (context name &optional definition)
(:method ((context context) name &optional (definition t))
(enter-into-table context (context-enumeration-constants context) '|enum_name| name definition)))
(enter-into-table context (context-enumeration-constants context) 'enum-name name definition)))
;;;; THE END ;;;;
......@@ -36,19 +36,30 @@
(:use)
(:import-from "COMMON-LISP"
"*" ">=" "/" "-" "++" "+" ">" "=" "<" "<=" "/=")
(:export "identifier" "typedef_name" "func_name" "string_literal"
"i_constant" "f_constant" "enum_name" "alignas" "alignof"
"atomic" "generic" "noreturn" "static_assert"
"thread_local" "case" "default" "if" "else" "switch"
"while" "do" "for" "goto" "continue" "break" "return"
"struct" "union" "enum" "..." "complex" "imaginary" "bool"
"char" "short" "int" "long" "signed" "unsigned" "float"
"double" "void" "const" "restrict" "volatile" "typedef"
"extern" "static" "auto" "register" "inline" "sizeof" "^="
"|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%=" "+="
"->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")" ","
":" ";" "." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!" "%"
"<" ">" "=" "^" "|" "?" "STAR")
(:import-from "COM.INFORMATIMAGO.LANGUAGES.CPP"
"IDENTIFIER" "STRING-LITERAL"
;; "TYPEDEF-NAME" "FUNC-NAME"
;; "I-CONSTANT" "F-CONSTANT" "ENUM-NAME"
)
(:export "IDENTIFIER" "TYPEDEF-NAME" "FUNC-NAME" "STRING-LITERAL"
"I-CONSTANT" "F-CONSTANT" "ENUM-NAME"
"STAR"
;; -
"_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex"
"_Generic" "_Imaginary" "_Noreturn" "_Static_assert"
"_Thread_local" "auto" "break" "case" "char" "const"
"continue" "default" "do" "double" "else" "enum" "extern"
"float" "for" "goto" "if" "inline" "int" "long" "register"
"restrict" "return" "short" "signed" "sizeof" "static"
"struct" "switch" "typedef" "union" "unsigned" "void"
"volatile" "while"
"^=" "|=" "-=" "<<=" ">>=" "&=" "&&" "||" "*=" "/=" "%="
"+=" "->" "++" "--" "<<" ">>" "<=" ">=" "==" "!=" "(" ")"
"," ":" ";" "." "..." "[" "]" "{" "}" "&" "*" "/" "+" "-" "~" "!"
"%" "<" ">" "=" "^" "|" "?")
(:documentation "This package exports the token-kinds of the C11 terminal symbols."))
(defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
......@@ -71,11 +82,12 @@
"COM.INFORMATIMAGO.COMMON-LISP.PARSER.SCANNER"
"COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS"
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT")
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
"COM.INFORMATIMAGO.LANGUAGES.CPP")
(:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"SPLIT-STRING")
(:export "C11-SCANNER"
"COMPUTE-TOKEN-KIND"))
"UPGRADE-C11-TOKEN"))
(defpackage "COM.INFORMATIMAGO.LANGUAGES.YACC.PARSER"
(:use "COMMON-LISP"
......@@ -99,7 +111,8 @@ returning a yacc:defgrammar form.
(defpackage "COM.INFORMATIMAGO.LANGUAGES.C11.PARSER"
(:use "CL-STEPPER"; "COMMON-LISP"
(:use ;; "CL-STEPPER"
"COMMON-LISP"
"COM.INFORMATIMAGO.RDP"
"COM.INFORMATIMAGO.COMMON-LISP.CESARUM.LIST"
"COM.INFORMATIMAGO.COMMON-LISP.CESARUM.STREAM"
......@@ -110,7 +123,8 @@ returning a yacc:defgrammar form.
"COM.INFORMATIMAGO.LANGUAGES.CPP"
"COM.INFORMATIMAGO.TOOLS.READER-MACRO"
"COM.INFORMATIMAGO.LANGUAGES.C11.TOKENS"
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT")
"COM.INFORMATIMAGO.LANGUAGES.C11.CONTEXT"
"COM.INFORMATIMAGO.LANGUAGES.C11.SCANNER")
(:shadowing-import-from "COM.INFORMATIMAGO.COMMON-LISP.REGEXP.REGEXP"
"SPLIT-STRING")
(:export "C11-PARSER"))
......
......@@ -6,23 +6,22 @@
;; (untrace compute-token-kind)
;; 7 seconds.
(defparameter *tc*
(let ((tokens (reduce (function append)
(defparameter *tc* (reduce (function append)
(reverse (com.informatimago.languages.cpp::context-output-lines
(let ((*identifier-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))))
(cpp-e "/Users/pjb/src/public/lisp/languages/cpp/tests/emacs.c"
:trace-includes t
:defines '("__GNUC__" "4" "__STDC__" "1" "__x86_64__" "1")
:defines '(;; "__GNUC__" "4"
"__STDC__" "1"
"__x86_64__" "1")
:includes '("/Users/pjb/src/macosx/emacs-24.5/src/")
:include-bracket-directories '("/Users/pjb/src/macosx/emacs-24.5/src/"
"/Users/pjb/src/macosx/emacs-24.5/lib/"
"/Users/pjb/src/macosx/gcc-4.9.2/gcc/ginclude/"
"/usr/include/")
:write-processed-lines nil))))
:initial-value '())))
(dolist (token tokens tokens)
(setf (token-kind token) (com.informatimago.languages.c11.scanner:compute-token-kind token)))))
:initial-value '()))
(with-open-file (out "p.lisp" :direction :output :if-exists :supersede :if-does-not-exist :create)
......@@ -32,17 +31,38 @@
(pprint grammar out))
(dolist (form rest)
(pprint form out))))
(load "p.lisp")
(defvar *scanner* nil)
(defun test/parse-stream (tokens)
(defun test/parse-stream (&optional tokens)
(declare (stepper disable))
(let ((*scanner* (make-instance 'pre-scanned-scanner :tokens tokens))
(*context* (make-instance 'context)))
(loop
:until (scanner-end-of-source-p *scanner*)
:collect (handler-bind ((parser-end-of-source-not-reached #'continue))
(parse-c11 *scanner*)))))
(let ((tokens (or tokens
(reduce (function append)
(reverse (com.informatimago.languages.cpp::context-output-lines
(let ((*identifier-package*
(load-time-value (find-package "COM.INFORMATIMAGO.LANGUAGES.C11.C"))))
(cpp-e "/Users/pjb/src/public/lisp/languages/cpp/tests/emacs.c"
:trace-includes t
:defines '(;; "__GNUC__" "4"
"__STDC__" "1"
"__x86_64__" "1")
:includes '("/Users/pjb/src/macosx/emacs-24.5/src/")
:include-bracket-directories '("/Users/pjb/src/macosx/emacs-24.5/src/"
"/Users/pjb/src/macosx/emacs-24.5/lib/"
"/Users/pjb/src/macosx/gcc-4.9.2/gcc/ginclude/"
"/usr/include/")
:write-processed-lines nil))))
:initial-value '()))))
(setf *tc* tokens)
(let ((*scanner* (make-instance 'pre-scanned-scanner :tokens tokens))
(*context* (make-instance 'context)))
(loop
:until (scanner-end-of-source-p *scanner*)
:collect (handler-bind ((parser-end-of-source-not-reached #'continue))
(parse-c11 *scanner*))))))
(step (test/parse-stream *tc*) :trace)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment