зеркало из https://github.com/mozilla/pjs.git
Added canonical LR(1) grammars and grammar cleaning
This commit is contained in:
Родитель
09fd8db28a
Коммит
2299109d45
|
@ -2886,7 +2886,7 @@
|
|||
|
||||
(defstruct preprocessor-state
|
||||
(kind nil :type (member nil :grammar :lexer)) ;The kind of grammar being accumulated or nil if none
|
||||
(kind2 nil :type (member nil :lalr-1 :lr-1)) ;The kind of parser
|
||||
(kind2 nil :type (member nil :lalr-1 :lr-1 :canonical-lr-1)) ;The kind of parser
|
||||
(name nil :type symbol) ;Name of the grammar being accumulated or nil if none
|
||||
(parametrization nil :type (or null grammar-parametrization)) ;Parametrization of the grammar being accumulated or nil if none
|
||||
(start-symbol nil :type symbol) ;Start symbol of the grammar being accumulated or nil if none
|
||||
|
|
|
@ -700,12 +700,19 @@
|
|||
(:copier nil)
|
||||
(:predicate state?))
|
||||
(number nil :type integer :read-only t) ;Serial number of the state
|
||||
(kernel nil :type list :read-only t) ;List of kernel items in order of increasing item-number values
|
||||
(laitems nil :type list :read-only t) ;List of laitems (topologically sorted by the propagates relation when parsing LR(1))
|
||||
(transitions nil :type list) ;List of (terminal . transition)
|
||||
(kernel nil :type list) ;List of kernel items [list of (item . lookahead) for canonical LR(1)] in order of increasing item-number values
|
||||
(laitems nil :type list) ;List of laitems
|
||||
(transitions nil :type list) ;List of (terminal . transition). A null terminal indicates "all terminals".
|
||||
(gotos nil :type list)) ;List of (nonterminal . state)
|
||||
|
||||
|
||||
; Return the transition for the given terminal or nil if there is none.
|
||||
(defun state-transition (state terminal)
|
||||
(cdr
|
||||
(or (assoc terminal (state-transitions state) :test *grammar-symbol-=*)
|
||||
(assoc nil (state-transitions state) :test *grammar-symbol-=*))))
|
||||
|
||||
|
||||
; If all outgoing transitions from the state are the same reduction, return that
|
||||
; reduction's production; otherwise return nil.
|
||||
(defun forwarding-state-production (state)
|
||||
|
@ -746,7 +753,12 @@
|
|||
(let ((grammar (laitem-grammar (first laitems))))
|
||||
(labels
|
||||
((lhs-matches-some-kernel-item (lhs-nonterminal)
|
||||
(member lhs-nonterminal (state-kernel state) :test *grammar-symbol-=* :key #'item-lhs))
|
||||
(member lhs-nonterminal (state-kernel state)
|
||||
:test *grammar-symbol-=*
|
||||
:key #'(lambda (kernel-item)
|
||||
(when (consp kernel-item)
|
||||
(setq kernel-item (car kernel-item)))
|
||||
(item-lhs kernel-item))))
|
||||
(laitem-< (laitem1 laitem2)
|
||||
(let* ((item1 (laitem-item laitem1))
|
||||
(item2 (laitem-item laitem2))
|
||||
|
@ -797,7 +809,10 @@
|
|||
(pprint-exit-if-list-exhausted)
|
||||
(let ((transition-cons (pprint-pop)))
|
||||
(pprint-logical-block (stream nil)
|
||||
(pprint-fill stream (car transition-cons) nil)
|
||||
(let ((terminals (car transition-cons)))
|
||||
(if (equal terminals '(nil))
|
||||
(write-string "any" stream)
|
||||
(pprint-fill stream terminals nil)))
|
||||
(format stream " ~2I~_=> ")
|
||||
(print-transition (cdr transition-cons) stream))
|
||||
(format stream " ~:_")))))
|
||||
|
@ -1021,7 +1036,7 @@
|
|||
(general-productions nil :type hash-table :read-only t);Hash table of production-name -> general-production
|
||||
(n-productions nil :type integer :read-only t) ;Number of productions in the grammar
|
||||
;The following fields are used for the parser.
|
||||
(items-hash nil :type hash-table :read-only t) ;Hash table of (production . dot) -> item
|
||||
(items-hash nil :type (or null hash-table)) ;Hash table of (production . dot) -> item; nil for a cleaned grammar or a grammar without a parser
|
||||
(states nil :type list) ;List of LR(0) states (in order of state numbers)
|
||||
;The following fields are used for the action generator.
|
||||
(action-signatures nil :type (or null hash-table))) ;Hash table of grammar-symbol -> list of (action-symbol . type-or-type-expr)
|
||||
|
@ -1357,8 +1372,7 @@
|
|||
:parameter-trees (make-hash-table :test *grammar-symbol-=*)
|
||||
:max-production-length max-production-length
|
||||
:general-productions general-productions
|
||||
:n-productions production-number
|
||||
:items-hash (make-hash-table :test #'equal))))
|
||||
:n-productions production-number)))
|
||||
|
||||
;Compute the terminalsets in the terminal-terminalsets.
|
||||
(dotimes (n (length terminals))
|
||||
|
@ -1476,6 +1490,8 @@
|
|||
(pprint-newline :mandatory stream)
|
||||
(pprint-logical-block (stream (grammar-states grammar))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
(unless (grammar-items-hash grammar)
|
||||
(error "Can't print a cleaned grammar's states"))
|
||||
(format stream "States:")
|
||||
(pprint-indent :block 2 stream)
|
||||
(pprint-newline :mandatory stream)
|
||||
|
|
|
@ -680,7 +680,7 @@
|
|||
|
||||
|
||||
; Make a lexer and grammar from the given source.
|
||||
; kind should be either :lalr-1 or :lr-1.
|
||||
; kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
; charclasses-source is a list of character classes, and
|
||||
; lexer-actions-source is a list of lexer-action bindings; see make-lexer.
|
||||
; start-symbol is the grammar's start symbol, and grammar-source is its source.
|
||||
|
|
|
@ -29,10 +29,14 @@
|
|||
; kernel-item-alist is a list of pairs (item . prev), where item is a kernel item
|
||||
; and prev is either nil or a laitem. kernel is a list of the kernel items in a canonical order.
|
||||
; Return a new state with the given list of kernel items and state number.
|
||||
; If update-propagates is true, for each non-null prev in kernel-item-alist, update
|
||||
; If mode is :lalr-1, for each non-null prev in kernel-item-alist, update
|
||||
; (laitem-propagates prev) to include the corresponding laitem in the new state. Do this anyway
|
||||
; for internal lookaheads, regardless of update-propagates.
|
||||
(defun make-state (grammar kernel kernel-item-alist update-propagates number initial-lookaheads)
|
||||
; for internal lookaheads, regardless of mode.
|
||||
;
|
||||
; If mode is :canonical-lr-1, kernel-item-alist is a list of pairs (item . lookaheads), where
|
||||
; lookaheads is a terminalset of lookaheads for that item. Use these lookaheads instead of
|
||||
; initial-lookaheads.
|
||||
(defun make-state (grammar kernel kernel-item-alist mode number initial-lookaheads)
|
||||
(let ((laitems nil)
|
||||
(laitems-hash (make-hash-table :test #'eq)))
|
||||
(labels
|
||||
|
@ -85,20 +89,20 @@
|
|||
(dolist (acons kernel-item-alist)
|
||||
(close-item (car acons)
|
||||
*empty-terminalset*
|
||||
initial-lookaheads
|
||||
(and update-propagates (cdr acons))
|
||||
(if (eq mode :canonical-lr-1) (cdr acons) initial-lookaheads)
|
||||
(and (eq mode :lalr-1) (cdr acons))
|
||||
*full-terminalset*))
|
||||
(allocate-state number kernel (nreverse laitems)))))
|
||||
|
||||
|
||||
; f is a function that takes three arguments:
|
||||
; a grammar symbol;
|
||||
; a list of kernel items in order of increasing item number;
|
||||
; a list of kernel items in order of increasing item number [list of (item . lookahead) when mode is :canonical-lr-1];
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem.
|
||||
; For each possible symbol X that can be shifted while in the given state S, call
|
||||
; f giving it S and the list of items that constitute the kernel of that shift's destination
|
||||
; state. The prev's are the sources of the corresponding shifted items.
|
||||
(defun state-each-shift-item-alist (f state)
|
||||
(defun state-each-shift-item-alist (f state mode)
|
||||
(let ((shift-symbols-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(dolist (source-laitem (state-laitems state))
|
||||
(let* ((source-item (laitem-item source-laitem))
|
||||
|
@ -108,8 +112,15 @@
|
|||
(gethash shift-symbol shift-symbols-hash)))))
|
||||
;Use dolist/gethash instead of maphash to make state assignments deterministic.
|
||||
(dolist (shift-symbol (sorted-hash-table-keys shift-symbols-hash))
|
||||
(let ((kernel-item-alist (gethash shift-symbol shift-symbols-hash)))
|
||||
(funcall f shift-symbol (sort (mapcar #'car kernel-item-alist) #'< :key #'item-number) kernel-item-alist)))))
|
||||
(let* ((kernel-item-alist (gethash shift-symbol shift-symbols-hash))
|
||||
(kernel (if (eq mode :canonical-lr-1)
|
||||
(sort (mapcar #'(lambda (acons)
|
||||
(cons (car acons) (laitem-lookaheads (cdr acons))))
|
||||
kernel-item-alist)
|
||||
#'<
|
||||
:key #'(lambda (acons) (item-number (car acons))))
|
||||
(sort (mapcar #'car kernel-item-alist) #'< :key #'item-number))))
|
||||
(funcall f shift-symbol kernel kernel-item-alist)))))
|
||||
|
||||
|
||||
; f is a function that takes a terminal variant as an argument.
|
||||
|
@ -138,6 +149,58 @@
|
|||
(error "Internal parser error"))))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; CANONICAL LR(1)
|
||||
;;;
|
||||
;;; Canonical LR(1) is accepts the same set of languages as LR(1) except that it produces vastly larger,
|
||||
;;; unoptimizied state tables. The only advantage to using Canonical LR(1) instead of LR(1) is that
|
||||
;;; a Canonical LR(1) parser will not make any reductions from an error state, whereas a LR(1) or LALR(1)
|
||||
;;; parser might make reductions (but not shifts). In other words, a Canonical LR(1) parser's shift and
|
||||
;;; reduce tables are fully accurate rather than conservative approximations based on merged states.
|
||||
|
||||
|
||||
; Make all states in the grammar and return the initial state.
|
||||
; Initialize the grammar's list of states.
|
||||
; Initialize the states' gotos lists.
|
||||
; Initialize the states' shift (but not reduce or accept) transitions in the transitions lists.
|
||||
(defun add-all-canonical-lr-states (grammar)
|
||||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lr-states-hash (make-hash-table :test #'equal)) ;canonical kernel -> state
|
||||
(initial-kernel (list (cons initial-item (make-terminalset grammar *end-marker*))))
|
||||
(initial-state (make-state grammar initial-kernel initial-kernel :canonical-lr-1 0 nil))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lr-states-hash) initial-state)
|
||||
(do ((source-states (list initial-state)))
|
||||
((endp source-states))
|
||||
(let ((source-state (pop source-states)))
|
||||
;Propagate the source state's internal lookaheads and then erase the propagates chains.
|
||||
(propagate-internal-lookaheads source-state)
|
||||
(dolist (laitem (state-laitems source-state))
|
||||
(setf (laitem-propagates laitem) nil))
|
||||
|
||||
(state-each-shift-item-alist
|
||||
#'(lambda (shift-symbol kernel kernel-item-alist)
|
||||
(let ((destination-state (gethash kernel lr-states-hash)))
|
||||
(unless destination-state
|
||||
(setq destination-state (make-state grammar kernel kernel :canonical-lr-1 next-state-number nil))
|
||||
(setf (gethash kernel lr-states-hash) destination-state)
|
||||
(incf next-state-number)
|
||||
(push destination-state states)
|
||||
(push destination-state source-states))
|
||||
(if (nonterminal? shift-symbol)
|
||||
(push (cons shift-symbol destination-state)
|
||||
(state-gotos source-state))
|
||||
(each-shift-symbol-variant
|
||||
#'(lambda (shift-symbol-variant)
|
||||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state :canonical-lr-1)))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; LR(1)
|
||||
|
||||
|
@ -182,12 +245,19 @@
|
|||
|
||||
; Propagate all lookaheads in the state.
|
||||
(defun propagate-internal-lookaheads (state)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(terminalset-union-f (laitem-lookaheads dst-laitem) (terminalset-intersection src-lookaheads mask)))))))
|
||||
(do ((changed t))
|
||||
((not changed))
|
||||
(setq changed nil)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let* ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation))
|
||||
(old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads (terminalset-intersection src-lookaheads mask))))
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setq changed t))))))))
|
||||
|
||||
|
||||
; Propagate all lookaheads in kernel-item-alist, which must target destination-state.
|
||||
|
@ -202,14 +272,13 @@
|
|||
|
||||
; Make all states in the grammar and return the initial state.
|
||||
; Initialize the grammar's list of states.
|
||||
; Set up the laitems' propagate lists but do not propagate lookaheads yet.
|
||||
; Initialize the states' gotos lists.
|
||||
; Initialize the states' shift (but not reduce or accept) transitions in the transitions lists.
|
||||
(defun add-all-lr-states (grammar)
|
||||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lr-states-hash (make-hash-table :test #'equal)) ;kernel -> list of states with that kernel
|
||||
(initial-kernel (list initial-item))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) nil 0 (make-terminalset grammar *end-marker*)))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) :lr-1 0 (make-terminalset grammar *end-marker*)))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lr-states-hash) (list initial-state))
|
||||
|
@ -229,7 +298,7 @@
|
|||
possible-destination-states))
|
||||
(propagate-external-lookaheads kernel-item-alist destination-state dirty-states))
|
||||
(t
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist nil next-state-number *empty-terminalset*))
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist :lr-1 next-state-number *empty-terminalset*))
|
||||
(propagate-external-lookaheads kernel-item-alist destination-state dirty-states)
|
||||
(push destination-state (gethash kernel lr-states-hash))
|
||||
(incf next-state-number)
|
||||
|
@ -261,7 +330,7 @@
|
|||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state))
|
||||
source-state :lr-1))
|
||||
(dolist (dirty-state (sort (hash-table-keys dirty-states) #'< :key #'state-number))
|
||||
(when (remhash dirty-state dirty-states)
|
||||
(propagate-internal-lookaheads dirty-state)
|
||||
|
@ -274,12 +343,12 @@
|
|||
(setf (cdr destination-binding) (update-destination-state destination-state kernel-item-alist)))
|
||||
(each-shift-symbol-variant
|
||||
#'(lambda (shift-symbol-variant)
|
||||
(let* ((destination-transition (cdr (assoc shift-symbol-variant (state-transitions dirty-state) :test *grammar-symbol-=*)))
|
||||
(let* ((destination-transition (state-transition dirty-state shift-symbol-variant))
|
||||
(destination-state (assert-non-null (transition-state destination-transition))))
|
||||
(setf (transition-state destination-transition)
|
||||
(update-destination-state destination-state kernel-item-alist))))
|
||||
grammar shift-symbol kernel-item-alist)))
|
||||
dirty-state))))))
|
||||
dirty-state :lr-1))))))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
@ -297,7 +366,7 @@
|
|||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lalr-states-hash (make-hash-table :test #'equal)) ;kernel -> state
|
||||
(initial-kernel (list initial-item))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) t 0 (make-terminalset grammar *end-marker*)))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) :lalr-1 0 (make-terminalset grammar *end-marker*)))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lalr-states-hash) initial-state)
|
||||
|
@ -311,7 +380,7 @@
|
|||
(dolist (acons kernel-item-alist)
|
||||
(laitem-add-propagation (cdr acons) (state-laitem destination-state (car acons)) *full-terminalset*))
|
||||
(progn
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist t next-state-number *empty-terminalset*))
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist :lalr-1 next-state-number *empty-terminalset*))
|
||||
(setf (gethash kernel lalr-states-hash) destination-state)
|
||||
(incf next-state-number)
|
||||
(push destination-state states)
|
||||
|
@ -324,7 +393,7 @@
|
|||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state)))
|
||||
source-state :lalr-1)))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
@ -457,33 +526,59 @@
|
|||
(write-char #\newline stream))))
|
||||
|
||||
|
||||
; Remove the temporary item and laitem lists from the grammar's states. This reduces the grammar's lisp
|
||||
; heap usage but prevents it from being printed.
|
||||
(defun clean-grammar (grammar)
|
||||
(when (grammar-items-hash grammar)
|
||||
(setf (grammar-items-hash grammar) nil)
|
||||
(dolist (state (grammar-states grammar))
|
||||
(setf (state-kernel state) nil)
|
||||
(setf (state-laitems state) nil))))
|
||||
|
||||
|
||||
; Erase the existing parser, if any, for the given grammar.
|
||||
(defun clear-parser (grammar)
|
||||
(clrhash (grammar-items-hash grammar))
|
||||
(setf (grammar-items-hash grammar) nil)
|
||||
(setf (grammar-states grammar) nil))
|
||||
|
||||
|
||||
; Construct a LR or LALR parser in the given grammar. kind should be either :lalr-1 or :lr-1.
|
||||
; Construct a LR or LALR parser in the given grammar. kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
; Return the grammar.
|
||||
(defun compile-parser (grammar kind)
|
||||
(clear-parser grammar)
|
||||
(setf (grammar-items-hash grammar) (make-hash-table :test #'equal))
|
||||
(ecase kind
|
||||
(:lalr-1
|
||||
(add-all-lalr-states grammar)
|
||||
(propagate-lalr-lookaheads grammar))
|
||||
(:lr-1
|
||||
(add-all-lr-states grammar)))
|
||||
(add-all-lr-states grammar))
|
||||
(:canonical-lr-1
|
||||
(add-all-canonical-lr-states grammar)))
|
||||
(finish-transitions grammar)
|
||||
(report-and-fix-ambiguities grammar *error-output*)
|
||||
grammar)
|
||||
|
||||
|
||||
; Make the grammar and compile its parser. kind should be either :lalr-1 or :lr-1.
|
||||
; Make the grammar and compile its parser. kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
(defun make-and-compile-grammar (kind parametrization start-symbol grammar-source &rest grammar-options)
|
||||
(compile-parser (apply #'make-grammar parametrization start-symbol grammar-source grammar-options)
|
||||
kind))
|
||||
|
||||
|
||||
; Collapse states that have at most one possible reduction into forwarding states.
|
||||
; DON'T DO THIS ON GRAMMARS THAT HAVE CONSTRAINTS AT THE TAIL END OF A PRODUCTION.
|
||||
; Return the number of states optimized.
|
||||
(defun forward-parser-states (grammar)
|
||||
(let ((n-forwarded-states 0))
|
||||
(dolist (state (grammar-states grammar))
|
||||
(let ((production (forwarding-state-production state)))
|
||||
(when production
|
||||
(setf (state-transitions state) (list (cons nil (make-reduce-transition production))))
|
||||
(incf n-forwarded-states))))
|
||||
n-forwarded-states))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
|
||||
; Parse the input list of tokens to produce a parse tree.
|
||||
|
@ -501,7 +596,7 @@
|
|||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (stack terminal token input-rest)
|
||||
(let* ((state (caar stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(transition (state-transition state terminal)))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift (parse-step (acons (transition-state transition) token stack) input-rest))
|
||||
|
@ -648,51 +743,7 @@
|
|||
(defun action-parse (grammar token-terminal input &key trace)
|
||||
(labels
|
||||
(;Continue the parse with the given stacks and remainder of input.
|
||||
(parse-step (state-stack value-stack input)
|
||||
(if (endp input)
|
||||
(parse-step-1 state-stack value-stack *end-marker* nil nil)
|
||||
(let ((token (first input)))
|
||||
(parse-step-1 state-stack value-stack (funcall token-terminal token) token (rest input)))))
|
||||
|
||||
;Same as parse-step except that the next input terminal has been determined already.
|
||||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (state-stack value-stack terminal token input-rest)
|
||||
(let* ((state (car state-stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift
|
||||
(dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
|
||||
(push (funcall (cdr action-function-binding) token) value-stack))
|
||||
(parse-step (cons (transition-state transition) state-stack) value-stack input-rest))
|
||||
(:reduce
|
||||
(let* ((production (transition-production transition))
|
||||
(state-stack (nthcdr (production-rhs-length production) state-stack))
|
||||
(state (car state-stack))
|
||||
(dst-state (assert-non-null
|
||||
(cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*))))
|
||||
(value-stack (funcall (production-evaluator production) value-stack)))
|
||||
(parse-step-1 (cons dst-state state-stack) value-stack terminal token input-rest)))
|
||||
(:accept (values (nreverse value-stack) (grammar-user-start-action-types grammar)))
|
||||
(t (error "Bad transition: ~S" transition)))
|
||||
(error "Parse error on ~S followed by ~S ..." token (ldiff input-rest (nthcdr 10 input-rest)))))))
|
||||
|
||||
(if trace
|
||||
(trace-action-parse grammar token-terminal input trace)
|
||||
(parse-step (list (grammar-start-state grammar)) nil input))))
|
||||
|
||||
|
||||
; Same as action-parse, but with tracing information
|
||||
; If trace is:
|
||||
; :code, print trace information, including action code
|
||||
; other print trace information
|
||||
; Return two values:
|
||||
; the list of action results;
|
||||
; the list of action results' types.
|
||||
(defun trace-action-parse (grammar token-terminal input trace)
|
||||
(labels
|
||||
(;Continue the parse with the given stacks and remainder of input.
|
||||
;type-stack contains the types of corresponding value-stack entries.
|
||||
;When trace is non-null, type-stack contains the types of corresponding value-stack entries.
|
||||
(parse-step (state-stack value-stack type-stack input)
|
||||
(if (endp input)
|
||||
(parse-step-1 state-stack value-stack type-stack *end-marker* nil nil)
|
||||
|
@ -703,38 +754,50 @@
|
|||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (state-stack value-stack type-stack terminal token input-rest)
|
||||
(let* ((state (car state-stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(format *trace-output* "S~D: ~@_" (state-number state))
|
||||
(print-values (reverse value-stack) (reverse type-stack) *trace-output*)
|
||||
(pprint-newline :mandatory *trace-output*)
|
||||
(transition (state-transition state terminal)))
|
||||
(when trace
|
||||
(format *trace-output* "S~D: ~@_" (state-number state))
|
||||
(print-values (reverse value-stack) (reverse type-stack) *trace-output*)
|
||||
(pprint-newline :mandatory *trace-output*))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift
|
||||
(format *trace-output* " shift ~W~:@_" terminal)
|
||||
(when trace
|
||||
(format *trace-output* " shift ~W~:@_" terminal)
|
||||
(dolist (action-signature (grammar-symbol-signature grammar terminal))
|
||||
(push (cdr action-signature) type-stack)))
|
||||
(dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
|
||||
(push (funcall (cdr action-function-binding) token) value-stack))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar terminal))
|
||||
(push (cdr action-signature) type-stack))
|
||||
(parse-step (cons (transition-state transition) state-stack) value-stack type-stack input-rest))
|
||||
|
||||
(:reduce
|
||||
(let ((production (transition-production transition)))
|
||||
(write-string " reduce " *trace-output*)
|
||||
(if (eq trace :code)
|
||||
(write production :stream *trace-output* :pretty t)
|
||||
(print-production production *trace-output*))
|
||||
(pprint-newline :mandatory *trace-output*)
|
||||
(when trace
|
||||
(write-string " reduce " *trace-output*)
|
||||
(if (eq trace :code)
|
||||
(write production :stream *trace-output* :pretty t)
|
||||
(print-production production *trace-output*))
|
||||
(pprint-newline :mandatory *trace-output*))
|
||||
(let* ((state-stack (nthcdr (production-rhs-length production) state-stack))
|
||||
(state (car state-stack))
|
||||
(dst-state (assert-non-null
|
||||
(cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*))))
|
||||
(value-stack (funcall (production-evaluator production) value-stack))
|
||||
(type-stack (nthcdr (production-n-action-args production) type-stack)))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar (production-lhs production)))
|
||||
(push (cdr action-signature) type-stack))
|
||||
(value-stack (funcall (production-evaluator production) value-stack)))
|
||||
(when trace
|
||||
(setq type-stack (nthcdr (production-n-action-args production) type-stack))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar (production-lhs production)))
|
||||
(push (cdr action-signature) type-stack)))
|
||||
(parse-step-1 (cons dst-state state-stack) value-stack type-stack terminal token input-rest))))
|
||||
|
||||
(:accept
|
||||
(format *trace-output* " accept~:@_")
|
||||
(values (nreverse value-stack) (nreverse type-stack)))
|
||||
(when trace
|
||||
(format *trace-output* " accept~:@_"))
|
||||
(values
|
||||
(nreverse value-stack)
|
||||
(if trace
|
||||
(nreverse type-stack)
|
||||
(grammar-user-start-action-types grammar))))
|
||||
|
||||
(t (error "Bad transition: ~S" transition)))
|
||||
(error "Parse error on ~S followed by ~S ..." token (ldiff input-rest (nthcdr 10 input-rest)))))))
|
||||
|
||||
|
|
|
@ -2886,7 +2886,7 @@
|
|||
|
||||
(defstruct preprocessor-state
|
||||
(kind nil :type (member nil :grammar :lexer)) ;The kind of grammar being accumulated or nil if none
|
||||
(kind2 nil :type (member nil :lalr-1 :lr-1)) ;The kind of parser
|
||||
(kind2 nil :type (member nil :lalr-1 :lr-1 :canonical-lr-1)) ;The kind of parser
|
||||
(name nil :type symbol) ;Name of the grammar being accumulated or nil if none
|
||||
(parametrization nil :type (or null grammar-parametrization)) ;Parametrization of the grammar being accumulated or nil if none
|
||||
(start-symbol nil :type symbol) ;Start symbol of the grammar being accumulated or nil if none
|
||||
|
|
|
@ -700,12 +700,19 @@
|
|||
(:copier nil)
|
||||
(:predicate state?))
|
||||
(number nil :type integer :read-only t) ;Serial number of the state
|
||||
(kernel nil :type list :read-only t) ;List of kernel items in order of increasing item-number values
|
||||
(laitems nil :type list :read-only t) ;List of laitems (topologically sorted by the propagates relation when parsing LR(1))
|
||||
(transitions nil :type list) ;List of (terminal . transition)
|
||||
(kernel nil :type list) ;List of kernel items [list of (item . lookahead) for canonical LR(1)] in order of increasing item-number values
|
||||
(laitems nil :type list) ;List of laitems
|
||||
(transitions nil :type list) ;List of (terminal . transition). A null terminal indicates "all terminals".
|
||||
(gotos nil :type list)) ;List of (nonterminal . state)
|
||||
|
||||
|
||||
; Return the transition for the given terminal or nil if there is none.
|
||||
(defun state-transition (state terminal)
|
||||
(cdr
|
||||
(or (assoc terminal (state-transitions state) :test *grammar-symbol-=*)
|
||||
(assoc nil (state-transitions state) :test *grammar-symbol-=*))))
|
||||
|
||||
|
||||
; If all outgoing transitions from the state are the same reduction, return that
|
||||
; reduction's production; otherwise return nil.
|
||||
(defun forwarding-state-production (state)
|
||||
|
@ -746,7 +753,12 @@
|
|||
(let ((grammar (laitem-grammar (first laitems))))
|
||||
(labels
|
||||
((lhs-matches-some-kernel-item (lhs-nonterminal)
|
||||
(member lhs-nonterminal (state-kernel state) :test *grammar-symbol-=* :key #'item-lhs))
|
||||
(member lhs-nonterminal (state-kernel state)
|
||||
:test *grammar-symbol-=*
|
||||
:key #'(lambda (kernel-item)
|
||||
(when (consp kernel-item)
|
||||
(setq kernel-item (car kernel-item)))
|
||||
(item-lhs kernel-item))))
|
||||
(laitem-< (laitem1 laitem2)
|
||||
(let* ((item1 (laitem-item laitem1))
|
||||
(item2 (laitem-item laitem2))
|
||||
|
@ -797,7 +809,10 @@
|
|||
(pprint-exit-if-list-exhausted)
|
||||
(let ((transition-cons (pprint-pop)))
|
||||
(pprint-logical-block (stream nil)
|
||||
(pprint-fill stream (car transition-cons) nil)
|
||||
(let ((terminals (car transition-cons)))
|
||||
(if (equal terminals '(nil))
|
||||
(write-string "any" stream)
|
||||
(pprint-fill stream terminals nil)))
|
||||
(format stream " ~2I~_=> ")
|
||||
(print-transition (cdr transition-cons) stream))
|
||||
(format stream " ~:_")))))
|
||||
|
@ -1021,7 +1036,7 @@
|
|||
(general-productions nil :type hash-table :read-only t);Hash table of production-name -> general-production
|
||||
(n-productions nil :type integer :read-only t) ;Number of productions in the grammar
|
||||
;The following fields are used for the parser.
|
||||
(items-hash nil :type hash-table :read-only t) ;Hash table of (production . dot) -> item
|
||||
(items-hash nil :type (or null hash-table)) ;Hash table of (production . dot) -> item; nil for a cleaned grammar or a grammar without a parser
|
||||
(states nil :type list) ;List of LR(0) states (in order of state numbers)
|
||||
;The following fields are used for the action generator.
|
||||
(action-signatures nil :type (or null hash-table))) ;Hash table of grammar-symbol -> list of (action-symbol . type-or-type-expr)
|
||||
|
@ -1357,8 +1372,7 @@
|
|||
:parameter-trees (make-hash-table :test *grammar-symbol-=*)
|
||||
:max-production-length max-production-length
|
||||
:general-productions general-productions
|
||||
:n-productions production-number
|
||||
:items-hash (make-hash-table :test #'equal))))
|
||||
:n-productions production-number)))
|
||||
|
||||
;Compute the terminalsets in the terminal-terminalsets.
|
||||
(dotimes (n (length terminals))
|
||||
|
@ -1476,6 +1490,8 @@
|
|||
(pprint-newline :mandatory stream)
|
||||
(pprint-logical-block (stream (grammar-states grammar))
|
||||
(pprint-exit-if-list-exhausted)
|
||||
(unless (grammar-items-hash grammar)
|
||||
(error "Can't print a cleaned grammar's states"))
|
||||
(format stream "States:")
|
||||
(pprint-indent :block 2 stream)
|
||||
(pprint-newline :mandatory stream)
|
||||
|
|
|
@ -680,7 +680,7 @@
|
|||
|
||||
|
||||
; Make a lexer and grammar from the given source.
|
||||
; kind should be either :lalr-1 or :lr-1.
|
||||
; kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
; charclasses-source is a list of character classes, and
|
||||
; lexer-actions-source is a list of lexer-action bindings; see make-lexer.
|
||||
; start-symbol is the grammar's start symbol, and grammar-source is its source.
|
||||
|
|
|
@ -29,10 +29,14 @@
|
|||
; kernel-item-alist is a list of pairs (item . prev), where item is a kernel item
|
||||
; and prev is either nil or a laitem. kernel is a list of the kernel items in a canonical order.
|
||||
; Return a new state with the given list of kernel items and state number.
|
||||
; If update-propagates is true, for each non-null prev in kernel-item-alist, update
|
||||
; If mode is :lalr-1, for each non-null prev in kernel-item-alist, update
|
||||
; (laitem-propagates prev) to include the corresponding laitem in the new state. Do this anyway
|
||||
; for internal lookaheads, regardless of update-propagates.
|
||||
(defun make-state (grammar kernel kernel-item-alist update-propagates number initial-lookaheads)
|
||||
; for internal lookaheads, regardless of mode.
|
||||
;
|
||||
; If mode is :canonical-lr-1, kernel-item-alist is a list of pairs (item . lookaheads), where
|
||||
; lookaheads is a terminalset of lookaheads for that item. Use these lookaheads instead of
|
||||
; initial-lookaheads.
|
||||
(defun make-state (grammar kernel kernel-item-alist mode number initial-lookaheads)
|
||||
(let ((laitems nil)
|
||||
(laitems-hash (make-hash-table :test #'eq)))
|
||||
(labels
|
||||
|
@ -85,20 +89,20 @@
|
|||
(dolist (acons kernel-item-alist)
|
||||
(close-item (car acons)
|
||||
*empty-terminalset*
|
||||
initial-lookaheads
|
||||
(and update-propagates (cdr acons))
|
||||
(if (eq mode :canonical-lr-1) (cdr acons) initial-lookaheads)
|
||||
(and (eq mode :lalr-1) (cdr acons))
|
||||
*full-terminalset*))
|
||||
(allocate-state number kernel (nreverse laitems)))))
|
||||
|
||||
|
||||
; f is a function that takes three arguments:
|
||||
; a grammar symbol;
|
||||
; a list of kernel items in order of increasing item number;
|
||||
; a list of kernel items in order of increasing item number [list of (item . lookahead) when mode is :canonical-lr-1];
|
||||
; a list of pairs (item . prev), where item is a kernel item and prev is a laitem.
|
||||
; For each possible symbol X that can be shifted while in the given state S, call
|
||||
; f giving it S and the list of items that constitute the kernel of that shift's destination
|
||||
; state. The prev's are the sources of the corresponding shifted items.
|
||||
(defun state-each-shift-item-alist (f state)
|
||||
(defun state-each-shift-item-alist (f state mode)
|
||||
(let ((shift-symbols-hash (make-hash-table :test *grammar-symbol-=*)))
|
||||
(dolist (source-laitem (state-laitems state))
|
||||
(let* ((source-item (laitem-item source-laitem))
|
||||
|
@ -108,8 +112,15 @@
|
|||
(gethash shift-symbol shift-symbols-hash)))))
|
||||
;Use dolist/gethash instead of maphash to make state assignments deterministic.
|
||||
(dolist (shift-symbol (sorted-hash-table-keys shift-symbols-hash))
|
||||
(let ((kernel-item-alist (gethash shift-symbol shift-symbols-hash)))
|
||||
(funcall f shift-symbol (sort (mapcar #'car kernel-item-alist) #'< :key #'item-number) kernel-item-alist)))))
|
||||
(let* ((kernel-item-alist (gethash shift-symbol shift-symbols-hash))
|
||||
(kernel (if (eq mode :canonical-lr-1)
|
||||
(sort (mapcar #'(lambda (acons)
|
||||
(cons (car acons) (laitem-lookaheads (cdr acons))))
|
||||
kernel-item-alist)
|
||||
#'<
|
||||
:key #'(lambda (acons) (item-number (car acons))))
|
||||
(sort (mapcar #'car kernel-item-alist) #'< :key #'item-number))))
|
||||
(funcall f shift-symbol kernel kernel-item-alist)))))
|
||||
|
||||
|
||||
; f is a function that takes a terminal variant as an argument.
|
||||
|
@ -138,6 +149,58 @@
|
|||
(error "Internal parser error"))))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; CANONICAL LR(1)
|
||||
;;;
|
||||
;;; Canonical LR(1) is accepts the same set of languages as LR(1) except that it produces vastly larger,
|
||||
;;; unoptimizied state tables. The only advantage to using Canonical LR(1) instead of LR(1) is that
|
||||
;;; a Canonical LR(1) parser will not make any reductions from an error state, whereas a LR(1) or LALR(1)
|
||||
;;; parser might make reductions (but not shifts). In other words, a Canonical LR(1) parser's shift and
|
||||
;;; reduce tables are fully accurate rather than conservative approximations based on merged states.
|
||||
|
||||
|
||||
; Make all states in the grammar and return the initial state.
|
||||
; Initialize the grammar's list of states.
|
||||
; Initialize the states' gotos lists.
|
||||
; Initialize the states' shift (but not reduce or accept) transitions in the transitions lists.
|
||||
(defun add-all-canonical-lr-states (grammar)
|
||||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lr-states-hash (make-hash-table :test #'equal)) ;canonical kernel -> state
|
||||
(initial-kernel (list (cons initial-item (make-terminalset grammar *end-marker*))))
|
||||
(initial-state (make-state grammar initial-kernel initial-kernel :canonical-lr-1 0 nil))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lr-states-hash) initial-state)
|
||||
(do ((source-states (list initial-state)))
|
||||
((endp source-states))
|
||||
(let ((source-state (pop source-states)))
|
||||
;Propagate the source state's internal lookaheads and then erase the propagates chains.
|
||||
(propagate-internal-lookaheads source-state)
|
||||
(dolist (laitem (state-laitems source-state))
|
||||
(setf (laitem-propagates laitem) nil))
|
||||
|
||||
(state-each-shift-item-alist
|
||||
#'(lambda (shift-symbol kernel kernel-item-alist)
|
||||
(let ((destination-state (gethash kernel lr-states-hash)))
|
||||
(unless destination-state
|
||||
(setq destination-state (make-state grammar kernel kernel :canonical-lr-1 next-state-number nil))
|
||||
(setf (gethash kernel lr-states-hash) destination-state)
|
||||
(incf next-state-number)
|
||||
(push destination-state states)
|
||||
(push destination-state source-states))
|
||||
(if (nonterminal? shift-symbol)
|
||||
(push (cons shift-symbol destination-state)
|
||||
(state-gotos source-state))
|
||||
(each-shift-symbol-variant
|
||||
#'(lambda (shift-symbol-variant)
|
||||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state :canonical-lr-1)))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
;;; LR(1)
|
||||
|
||||
|
@ -182,12 +245,19 @@
|
|||
|
||||
; Propagate all lookaheads in the state.
|
||||
(defun propagate-internal-lookaheads (state)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation)))
|
||||
(terminalset-union-f (laitem-lookaheads dst-laitem) (terminalset-intersection src-lookaheads mask)))))))
|
||||
(do ((changed t))
|
||||
((not changed))
|
||||
(setq changed nil)
|
||||
(dolist (src-laitem (state-laitems state))
|
||||
(let ((src-lookaheads (laitem-lookaheads src-laitem)))
|
||||
(dolist (propagation (laitem-propagates src-laitem))
|
||||
(let* ((dst-laitem (car propagation))
|
||||
(mask (cdr propagation))
|
||||
(old-dst-lookaheads (laitem-lookaheads dst-laitem))
|
||||
(new-dst-lookaheads (terminalset-union old-dst-lookaheads (terminalset-intersection src-lookaheads mask))))
|
||||
(setf (laitem-lookaheads dst-laitem) new-dst-lookaheads)
|
||||
(unless (terminalset-= old-dst-lookaheads new-dst-lookaheads)
|
||||
(setq changed t))))))))
|
||||
|
||||
|
||||
; Propagate all lookaheads in kernel-item-alist, which must target destination-state.
|
||||
|
@ -202,14 +272,13 @@
|
|||
|
||||
; Make all states in the grammar and return the initial state.
|
||||
; Initialize the grammar's list of states.
|
||||
; Set up the laitems' propagate lists but do not propagate lookaheads yet.
|
||||
; Initialize the states' gotos lists.
|
||||
; Initialize the states' shift (but not reduce or accept) transitions in the transitions lists.
|
||||
(defun add-all-lr-states (grammar)
|
||||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lr-states-hash (make-hash-table :test #'equal)) ;kernel -> list of states with that kernel
|
||||
(initial-kernel (list initial-item))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) nil 0 (make-terminalset grammar *end-marker*)))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) :lr-1 0 (make-terminalset grammar *end-marker*)))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lr-states-hash) (list initial-state))
|
||||
|
@ -229,7 +298,7 @@
|
|||
possible-destination-states))
|
||||
(propagate-external-lookaheads kernel-item-alist destination-state dirty-states))
|
||||
(t
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist nil next-state-number *empty-terminalset*))
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist :lr-1 next-state-number *empty-terminalset*))
|
||||
(propagate-external-lookaheads kernel-item-alist destination-state dirty-states)
|
||||
(push destination-state (gethash kernel lr-states-hash))
|
||||
(incf next-state-number)
|
||||
|
@ -261,7 +330,7 @@
|
|||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state))
|
||||
source-state :lr-1))
|
||||
(dolist (dirty-state (sort (hash-table-keys dirty-states) #'< :key #'state-number))
|
||||
(when (remhash dirty-state dirty-states)
|
||||
(propagate-internal-lookaheads dirty-state)
|
||||
|
@ -274,12 +343,12 @@
|
|||
(setf (cdr destination-binding) (update-destination-state destination-state kernel-item-alist)))
|
||||
(each-shift-symbol-variant
|
||||
#'(lambda (shift-symbol-variant)
|
||||
(let* ((destination-transition (cdr (assoc shift-symbol-variant (state-transitions dirty-state) :test *grammar-symbol-=*)))
|
||||
(let* ((destination-transition (state-transition dirty-state shift-symbol-variant))
|
||||
(destination-state (assert-non-null (transition-state destination-transition))))
|
||||
(setf (transition-state destination-transition)
|
||||
(update-destination-state destination-state kernel-item-alist))))
|
||||
grammar shift-symbol kernel-item-alist)))
|
||||
dirty-state))))))
|
||||
dirty-state :lr-1))))))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
@ -297,7 +366,7 @@
|
|||
(let* ((initial-item (make-item grammar (grammar-start-production grammar) 0))
|
||||
(lalr-states-hash (make-hash-table :test #'equal)) ;kernel -> state
|
||||
(initial-kernel (list initial-item))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) t 0 (make-terminalset grammar *end-marker*)))
|
||||
(initial-state (make-state grammar initial-kernel (list (cons initial-item nil)) :lalr-1 0 (make-terminalset grammar *end-marker*)))
|
||||
(states (list initial-state))
|
||||
(next-state-number 1))
|
||||
(setf (gethash initial-kernel lalr-states-hash) initial-state)
|
||||
|
@ -311,7 +380,7 @@
|
|||
(dolist (acons kernel-item-alist)
|
||||
(laitem-add-propagation (cdr acons) (state-laitem destination-state (car acons)) *full-terminalset*))
|
||||
(progn
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist t next-state-number *empty-terminalset*))
|
||||
(setq destination-state (make-state grammar kernel kernel-item-alist :lalr-1 next-state-number *empty-terminalset*))
|
||||
(setf (gethash kernel lalr-states-hash) destination-state)
|
||||
(incf next-state-number)
|
||||
(push destination-state states)
|
||||
|
@ -324,7 +393,7 @@
|
|||
(push (cons shift-symbol-variant (make-shift-transition destination-state))
|
||||
(state-transitions source-state)))
|
||||
grammar shift-symbol kernel-item-alist))))
|
||||
source-state)))
|
||||
source-state :lalr-1)))
|
||||
(setf (grammar-states grammar) (nreverse states))
|
||||
initial-state))
|
||||
|
||||
|
@ -457,33 +526,59 @@
|
|||
(write-char #\newline stream))))
|
||||
|
||||
|
||||
; Remove the temporary item and laitem lists from the grammar's states. This reduces the grammar's lisp
|
||||
; heap usage but prevents it from being printed.
|
||||
(defun clean-grammar (grammar)
|
||||
(when (grammar-items-hash grammar)
|
||||
(setf (grammar-items-hash grammar) nil)
|
||||
(dolist (state (grammar-states grammar))
|
||||
(setf (state-kernel state) nil)
|
||||
(setf (state-laitems state) nil))))
|
||||
|
||||
|
||||
; Erase the existing parser, if any, for the given grammar.
|
||||
(defun clear-parser (grammar)
|
||||
(clrhash (grammar-items-hash grammar))
|
||||
(setf (grammar-items-hash grammar) nil)
|
||||
(setf (grammar-states grammar) nil))
|
||||
|
||||
|
||||
; Construct a LR or LALR parser in the given grammar. kind should be either :lalr-1 or :lr-1.
|
||||
; Construct a LR or LALR parser in the given grammar. kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
; Return the grammar.
|
||||
(defun compile-parser (grammar kind)
|
||||
(clear-parser grammar)
|
||||
(setf (grammar-items-hash grammar) (make-hash-table :test #'equal))
|
||||
(ecase kind
|
||||
(:lalr-1
|
||||
(add-all-lalr-states grammar)
|
||||
(propagate-lalr-lookaheads grammar))
|
||||
(:lr-1
|
||||
(add-all-lr-states grammar)))
|
||||
(add-all-lr-states grammar))
|
||||
(:canonical-lr-1
|
||||
(add-all-canonical-lr-states grammar)))
|
||||
(finish-transitions grammar)
|
||||
(report-and-fix-ambiguities grammar *error-output*)
|
||||
grammar)
|
||||
|
||||
|
||||
; Make the grammar and compile its parser. kind should be either :lalr-1 or :lr-1.
|
||||
; Make the grammar and compile its parser. kind should be :lalr-1, :lr-1, or :canonical-lr-1.
|
||||
(defun make-and-compile-grammar (kind parametrization start-symbol grammar-source &rest grammar-options)
|
||||
(compile-parser (apply #'make-grammar parametrization start-symbol grammar-source grammar-options)
|
||||
kind))
|
||||
|
||||
|
||||
; Collapse states that have at most one possible reduction into forwarding states.
|
||||
; DON'T DO THIS ON GRAMMARS THAT HAVE CONSTRAINTS AT THE TAIL END OF A PRODUCTION.
|
||||
; Return the number of states optimized.
|
||||
(defun forward-parser-states (grammar)
|
||||
(let ((n-forwarded-states 0))
|
||||
(dolist (state (grammar-states grammar))
|
||||
(let ((production (forwarding-state-production state)))
|
||||
(when production
|
||||
(setf (state-transitions state) (list (cons nil (make-reduce-transition production))))
|
||||
(incf n-forwarded-states))))
|
||||
n-forwarded-states))
|
||||
|
||||
|
||||
;;; ------------------------------------------------------------------------------------------------------
|
||||
|
||||
; Parse the input list of tokens to produce a parse tree.
|
||||
|
@ -501,7 +596,7 @@
|
|||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (stack terminal token input-rest)
|
||||
(let* ((state (caar stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(transition (state-transition state terminal)))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift (parse-step (acons (transition-state transition) token stack) input-rest))
|
||||
|
@ -648,51 +743,7 @@
|
|||
(defun action-parse (grammar token-terminal input &key trace)
|
||||
(labels
|
||||
(;Continue the parse with the given stacks and remainder of input.
|
||||
(parse-step (state-stack value-stack input)
|
||||
(if (endp input)
|
||||
(parse-step-1 state-stack value-stack *end-marker* nil nil)
|
||||
(let ((token (first input)))
|
||||
(parse-step-1 state-stack value-stack (funcall token-terminal token) token (rest input)))))
|
||||
|
||||
;Same as parse-step except that the next input terminal has been determined already.
|
||||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (state-stack value-stack terminal token input-rest)
|
||||
(let* ((state (car state-stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift
|
||||
(dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
|
||||
(push (funcall (cdr action-function-binding) token) value-stack))
|
||||
(parse-step (cons (transition-state transition) state-stack) value-stack input-rest))
|
||||
(:reduce
|
||||
(let* ((production (transition-production transition))
|
||||
(state-stack (nthcdr (production-rhs-length production) state-stack))
|
||||
(state (car state-stack))
|
||||
(dst-state (assert-non-null
|
||||
(cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*))))
|
||||
(value-stack (funcall (production-evaluator production) value-stack)))
|
||||
(parse-step-1 (cons dst-state state-stack) value-stack terminal token input-rest)))
|
||||
(:accept (values (nreverse value-stack) (grammar-user-start-action-types grammar)))
|
||||
(t (error "Bad transition: ~S" transition)))
|
||||
(error "Parse error on ~S followed by ~S ..." token (ldiff input-rest (nthcdr 10 input-rest)))))))
|
||||
|
||||
(if trace
|
||||
(trace-action-parse grammar token-terminal input trace)
|
||||
(parse-step (list (grammar-start-state grammar)) nil input))))
|
||||
|
||||
|
||||
; Same as action-parse, but with tracing information
|
||||
; If trace is:
|
||||
; :code, print trace information, including action code
|
||||
; other print trace information
|
||||
; Return two values:
|
||||
; the list of action results;
|
||||
; the list of action results' types.
|
||||
(defun trace-action-parse (grammar token-terminal input trace)
|
||||
(labels
|
||||
(;Continue the parse with the given stacks and remainder of input.
|
||||
;type-stack contains the types of corresponding value-stack entries.
|
||||
;When trace is non-null, type-stack contains the types of corresponding value-stack entries.
|
||||
(parse-step (state-stack value-stack type-stack input)
|
||||
(if (endp input)
|
||||
(parse-step-1 state-stack value-stack type-stack *end-marker* nil nil)
|
||||
|
@ -703,38 +754,50 @@
|
|||
;input-rest contains the input tokens after the next token.
|
||||
(parse-step-1 (state-stack value-stack type-stack terminal token input-rest)
|
||||
(let* ((state (car state-stack))
|
||||
(transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*))))
|
||||
(format *trace-output* "S~D: ~@_" (state-number state))
|
||||
(print-values (reverse value-stack) (reverse type-stack) *trace-output*)
|
||||
(pprint-newline :mandatory *trace-output*)
|
||||
(transition (state-transition state terminal)))
|
||||
(when trace
|
||||
(format *trace-output* "S~D: ~@_" (state-number state))
|
||||
(print-values (reverse value-stack) (reverse type-stack) *trace-output*)
|
||||
(pprint-newline :mandatory *trace-output*))
|
||||
(if transition
|
||||
(case (transition-kind transition)
|
||||
(:shift
|
||||
(format *trace-output* " shift ~W~:@_" terminal)
|
||||
(when trace
|
||||
(format *trace-output* " shift ~W~:@_" terminal)
|
||||
(dolist (action-signature (grammar-symbol-signature grammar terminal))
|
||||
(push (cdr action-signature) type-stack)))
|
||||
(dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar)))
|
||||
(push (funcall (cdr action-function-binding) token) value-stack))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar terminal))
|
||||
(push (cdr action-signature) type-stack))
|
||||
(parse-step (cons (transition-state transition) state-stack) value-stack type-stack input-rest))
|
||||
|
||||
(:reduce
|
||||
(let ((production (transition-production transition)))
|
||||
(write-string " reduce " *trace-output*)
|
||||
(if (eq trace :code)
|
||||
(write production :stream *trace-output* :pretty t)
|
||||
(print-production production *trace-output*))
|
||||
(pprint-newline :mandatory *trace-output*)
|
||||
(when trace
|
||||
(write-string " reduce " *trace-output*)
|
||||
(if (eq trace :code)
|
||||
(write production :stream *trace-output* :pretty t)
|
||||
(print-production production *trace-output*))
|
||||
(pprint-newline :mandatory *trace-output*))
|
||||
(let* ((state-stack (nthcdr (production-rhs-length production) state-stack))
|
||||
(state (car state-stack))
|
||||
(dst-state (assert-non-null
|
||||
(cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*))))
|
||||
(value-stack (funcall (production-evaluator production) value-stack))
|
||||
(type-stack (nthcdr (production-n-action-args production) type-stack)))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar (production-lhs production)))
|
||||
(push (cdr action-signature) type-stack))
|
||||
(value-stack (funcall (production-evaluator production) value-stack)))
|
||||
(when trace
|
||||
(setq type-stack (nthcdr (production-n-action-args production) type-stack))
|
||||
(dolist (action-signature (grammar-symbol-signature grammar (production-lhs production)))
|
||||
(push (cdr action-signature) type-stack)))
|
||||
(parse-step-1 (cons dst-state state-stack) value-stack type-stack terminal token input-rest))))
|
||||
|
||||
(:accept
|
||||
(format *trace-output* " accept~:@_")
|
||||
(values (nreverse value-stack) (nreverse type-stack)))
|
||||
(when trace
|
||||
(format *trace-output* " accept~:@_"))
|
||||
(values
|
||||
(nreverse value-stack)
|
||||
(if trace
|
||||
(nreverse type-stack)
|
||||
(grammar-user-start-action-types grammar))))
|
||||
|
||||
(t (error "Bad transition: ~S" transition)))
|
||||
(error "Parse error on ~S followed by ~S ..." token (ldiff input-rest (nthcdr 10 input-rest)))))))
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче