зеркало из https://github.com/mozilla/pjs.git
1470 строки
72 KiB
Common Lisp
1470 строки
72 KiB
Common Lisp
;;; The contents of this file are subject to the Mozilla Public
|
|
;;; License Version 1.1 (the "License"); you may not use this file
|
|
;;; except in compliance with the License. You may obtain a copy of
|
|
;;; the License at http://www.mozilla.org/MPL/
|
|
;;;
|
|
;;; Software distributed under the License is distributed on an "AS
|
|
;;; IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
;;; implied. See the License for the specific language governing
|
|
;;; rights and limitations under the License.
|
|
;;;
|
|
;;; The Original Code is the Language Design and Prototyping Environment.
|
|
;;;
|
|
;;; The Initial Developer of the Original Code is Netscape Communications
|
|
;;; Corporation. Portions created by Netscape Communications Corporation are
|
|
;;; Copyright (C) 1999 Netscape Communications Corporation. All
|
|
;;; Rights Reserved.
|
|
;;;
|
|
;;; Contributor(s): Waldemar Horwat <waldemar@acm.org>
|
|
|
|
;;;
|
|
;;; LALR(1) and LR(1) grammar generator
|
|
;;;
|
|
;;; Waldemar Horwat (waldemar@acm.org)
|
|
;;;
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; TERMINALSETS
|
|
|
|
;;; A set of terminals is a bitset represented by an integer, indexed by terminal numbers.
|
|
|
|
(deftype terminalset () 'integer)
|
|
|
|
(defconstant *empty-terminalset* 0)
|
|
(defconstant *full-terminalset* -1)
|
|
|
|
; Return true if terminalset is empty.
|
|
(declaim (inline terminalset-empty?))
|
|
(defun terminalset-empty? (terminalset)
|
|
(zerop terminalset))
|
|
|
|
|
|
; Return true if the two terminalsets are equal.
|
|
(declaim (inline terminalset-=))
|
|
(defun terminalset-= (terminalset1 terminalset2)
|
|
(= terminalset1 terminalset2))
|
|
|
|
|
|
; Return true if the terminalset1 is a subset of (or equal to) terminalset2.
|
|
(declaim (inline terminalset-<=))
|
|
(defun terminalset-<= (terminalset1 terminalset2)
|
|
(zerop (logandc2 terminalset1 terminalset2)))
|
|
|
|
|
|
; Return true if the two terminalsets are disjoint.
|
|
(declaim (inline terminalsets-disjoint))
|
|
(defun terminalsets-disjoint (terminalset1 terminalset2)
|
|
(zerop (logand terminalset1 terminalset2)))
|
|
|
|
|
|
; Return the complement of the terminalset.
|
|
(declaim (inline terminalset-complement))
|
|
(defun terminalset-complement (terminalset)
|
|
(lognot terminalset))
|
|
|
|
|
|
; Return the intersection of the two terminalsets.
|
|
(declaim (inline terminalset-intersection))
|
|
(defun terminalset-intersection (terminalset1 terminalset2)
|
|
(logand terminalset1 terminalset2))
|
|
|
|
(define-modify-macro terminalset-intersection-f (terminalset2) terminalset-intersection)
|
|
|
|
|
|
; Return the union of the two terminalsets.
|
|
(declaim (inline terminalset-union))
|
|
(defun terminalset-union (terminalset1 terminalset2)
|
|
(logior terminalset1 terminalset2))
|
|
|
|
(define-modify-macro terminalset-union-f (terminalset2) terminalset-union)
|
|
|
|
|
|
; Return the elements in terminalset1 that are not in terminalset2.
|
|
(declaim (inline terminalset-difference))
|
|
(defun terminalset-difference (terminalset1 terminalset2)
|
|
(logandc2 terminalset1 terminalset2))
|
|
|
|
(define-modify-macro terminalset-difference-f (terminalset2) terminalset-difference)
|
|
|
|
|
|
; Return a unique serial number for the given terminal.
|
|
(declaim (inline terminal-number))
|
|
(defun terminal-number (grammar terminal)
|
|
(assert-non-null (gethash terminal (grammar-terminal-numbers grammar))
|
|
"Can't find terminal ~S" terminal))
|
|
|
|
|
|
; Return true if the terminal is present in the terminalset.
|
|
(defun terminal-in-terminalset (grammar terminal terminalset)
|
|
(logbitp (terminal-number grammar terminal) terminalset))
|
|
|
|
|
|
; Return a one-element terminalset containing the given terminal.
|
|
(declaim (inline make-terminalset))
|
|
(defun make-terminalset (grammar terminal)
|
|
(ash 1 (terminal-number grammar terminal)))
|
|
|
|
|
|
; Call f on every terminal in the terminalset in reverse order.
|
|
(defun map-terminalset-reverse (f grammar terminalset)
|
|
(do ()
|
|
((zerop terminalset))
|
|
(let ((last (1- (integer-length terminalset))))
|
|
(funcall f (svref (grammar-terminals grammar) last))
|
|
(setq terminalset (- terminalset (ash 1 last))))))
|
|
|
|
|
|
; Return a list containing the terminals in the terminalset.
|
|
(defun terminalset-list (grammar terminalset)
|
|
(let ((terminals nil))
|
|
(map-terminalset-reverse #'(lambda (terminal) (push terminal terminals))
|
|
grammar terminalset)
|
|
terminals))
|
|
|
|
|
|
(defun print-terminalset (grammar terminalset &optional (stream t))
|
|
(pprint-fill stream (terminalset-list grammar terminalset) nil))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; ACTIONS
|
|
|
|
;;; An action describes one semantic function associated with a production.
|
|
;;; The action code is a lisp form that evaluates to a function f that takes
|
|
;;; the following arguments:
|
|
;;; the values returned by the rhs's first grammar symbol's actions, in order of the
|
|
;;; actions of that grammar symbol;
|
|
;;; the values returned by the rhs's second grammar symbol's actions, in order of the
|
|
;;; actions of that grammar symbol;
|
|
;;; ...
|
|
;;; the values returned by the rhs's last grammar symbol's actions, in order of the
|
|
;;; actions of that grammar symbol.
|
|
;;; Function f returns one value, which is the result of this action.
|
|
(defstruct (action (:constructor make-action (expr))
|
|
(:predicate action?))
|
|
(expr nil :read-only t) ;The unparsed source expression that defines the action
|
|
(code nil)) ;The generated lisp source code that performs the action
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; CONSTRAINTS
|
|
|
|
;;; A constraint modifies the rhs of a production. The constraint applies just before the pos-th
|
|
;;; general grammar symbol on the rhs of the production.
|
|
(defstruct (constraint (:constructor nil) (:copier nil) (:predicate constraint?))
|
|
(pos nil :type integer :read-only t)) ;Position of this constraint; ranges between 0 and length(general-production-rhs), inclusive.
|
|
|
|
|
|
;;; A lookahead-constraint imposes a restriction on the current lookahead terminal when matching
|
|
;;; against the production. The match succeeds only if there is no following terminal or the following
|
|
;;; terminal is not present in the lookahead-constraint's list of forbidden terminals.
|
|
(defstruct (lookahead-constraint (:include constraint)
|
|
(:constructor make-lookahead-constraint (pos forbidden-terminals source))
|
|
(:predicate lookahead-constraint?))
|
|
(forbidden-terminals nil :type list :read-only t) ;List of forbidden terminals
|
|
(source nil :type list :read-only t) ;List of grammar symbols (terminals or nonterminals) that produced the list of forbidden terminals
|
|
(terminalset nil :type (or null terminalset))) ;Set of allowed terminals (complement of forbidden-terminals); null until terminals are numbered
|
|
|
|
|
|
; Emit markup for a lookahead-constraint.
|
|
(defun depict-lookahead-constraint (markup-stream lookahead-constraint)
|
|
(depict markup-stream :begin-negative-lookahead)
|
|
(depict-list markup-stream
|
|
#'depict-production-rhs-component
|
|
(lookahead-constraint-source lookahead-constraint)
|
|
:separator ", ")
|
|
(depict markup-stream :end-negative-lookahead))
|
|
|
|
|
|
(defmethod print-object ((lookahead-constraint lookahead-constraint) stream)
|
|
(print-unreadable-object (lookahead-constraint stream)
|
|
(format stream "-~{ ~:_~W~}" (lookahead-constraint-source lookahead-constraint))))
|
|
|
|
|
|
;;; A no-line-break-constraint succeeds only when there is no line break at the current position.
|
|
(defstruct (no-line-break-constraint (:include constraint)
|
|
(:constructor make-no-line-break-constraint (pos))
|
|
(:predicate no-line-break-constraint?)))
|
|
|
|
|
|
; Emit markup for a no-line-break-constraint.
|
|
(defun depict-no-line-break-constraint (markup-stream no-line-break-constraint)
|
|
(declare (ignore no-line-break-constraint))
|
|
(depict markup-stream :no-line-break))
|
|
|
|
|
|
(defmethod print-object ((no-line-break-constraint no-line-break-constraint) stream)
|
|
(print-unreadable-object (no-line-break-constraint stream)))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GENERALIZED PRODUCTIONS
|
|
|
|
;;; A general-production is a common base class for the production and generic-production classes.
|
|
(defstruct (general-production (:constructor nil) (:copier nil) (:predicate general-production?))
|
|
(lhs nil :type general-nonterminal :read-only t) ;The general-nonterminal on the left-hand side of this general-production
|
|
(rhs nil :type list :read-only t) ;List of general grammar symbols to which that general-nonterminal expands
|
|
(constraints nil :type list :read-only t) ;List of constraints applying to rhs, sorted by increasing pos values
|
|
(name nil :read-only t)) ;This general-production's name that will be used to name the parse tree node
|
|
|
|
|
|
; If general-production is a generic-production, return its list of productions;
|
|
; if general-production is a production, return it in a one-element list.
|
|
(defun general-production-productions (general-production)
|
|
(if (production? general-production)
|
|
(list general-production)
|
|
(generic-production-productions general-production)))
|
|
|
|
|
|
; Return the general-production's rhs with constraints interleaved with the rhs general grammar symbols.
|
|
(defun general-production-rhs-components (general-production)
|
|
(labels
|
|
((merge-constraints (rhs constraints pos)
|
|
(if constraints
|
|
(let* ((constraint (first constraints))
|
|
(constraint-pos (constraint-pos constraint)))
|
|
(cond
|
|
((= constraint-pos pos)
|
|
(cons constraint (merge-constraints rhs (rest constraints) pos)))
|
|
(rhs
|
|
(cons (first rhs) (merge-constraints (rest rhs) constraints (1+ pos))))
|
|
(t (error "Bad constraint list"))))
|
|
rhs)))
|
|
|
|
(merge-constraints (general-production-rhs general-production)
|
|
(general-production-constraints general-production)
|
|
0)))
|
|
|
|
|
|
; Return the general-production's lookahead-constraint's terminalset at the given position.
|
|
(defun general-production-lookahead-constraint (general-production pos)
|
|
(dolist (constraint (general-production-constraints general-production) *full-terminalset*)
|
|
(when (and (lookahead-constraint? constraint)
|
|
(= (constraint-pos constraint) pos))
|
|
(return (lookahead-constraint-terminalset constraint)))))
|
|
|
|
|
|
; Emit a markup paragraph for the left-hand-side of a general production.
|
|
(defun depict-general-production-lhs (markup-stream lhs-general-nonterminal)
|
|
(depict-paragraph (markup-stream ':grammar-lhs)
|
|
(depict-general-nonterminal markup-stream lhs-general-nonterminal :definition)
|
|
(depict markup-stream " " ':derives-10)))
|
|
|
|
|
|
; Emit markup for a production right-hand-side component.
|
|
(defun depict-production-rhs-component (markup-stream production-rhs-component &optional subscript)
|
|
(cond
|
|
((lookahead-constraint? production-rhs-component)
|
|
(depict-lookahead-constraint markup-stream production-rhs-component))
|
|
((no-line-break-constraint? production-rhs-component)
|
|
(depict-no-line-break-constraint markup-stream production-rhs-component))
|
|
(t (depict-general-grammar-symbol markup-stream production-rhs-component :reference subscript))))
|
|
|
|
|
|
; Emit a markup paragraph for the right-hand-side of a general production.
|
|
; first is true if this is the first production in a rule.
|
|
; last is true if this is the last production in a rule.
|
|
(defun depict-general-production-rhs (markup-stream general-production first last)
|
|
(depict-paragraph (markup-stream (if last ':grammar-rhs-last ':grammar-rhs))
|
|
(if first
|
|
(depict markup-stream ':tab3)
|
|
(depict markup-stream "|" ':tab2))
|
|
(let ((rhs-components (general-production-rhs-components general-production)))
|
|
(depict-list markup-stream
|
|
#'depict-production-rhs-component
|
|
rhs-components
|
|
:separator " "
|
|
:empty '(:left-angle-quote "empty" :right-angle-quote)))))
|
|
|
|
|
|
; Emit the general production, including both its left and right-hand sides.
|
|
; Include serial number subscripts on all rhs grammar symbols that both
|
|
; appear more than once in the rhs or appear in the lhs, and
|
|
; have symbols that are present in the symbols-with-subscripts list.
|
|
; link is the lhs's link type.
|
|
(defun depict-general-production (markup-stream general-production link &optional symbols-with-subscripts)
|
|
(let ((lhs (general-production-lhs general-production))
|
|
(rhs-components (general-production-rhs-components general-production)))
|
|
(depict-general-nonterminal markup-stream lhs link)
|
|
(depict markup-stream " " ':derives-10)
|
|
(if rhs-components
|
|
(let ((counts-hash (make-hash-table :test *grammar-symbol-=*)))
|
|
(when symbols-with-subscripts
|
|
(dolist (symbol symbols-with-subscripts)
|
|
(setf (gethash symbol counts-hash) 0))
|
|
(dolist (production-rhs-component (cons lhs (general-production-rhs general-production)))
|
|
(when (general-grammar-symbol? production-rhs-component)
|
|
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
|
(when (gethash symbol counts-hash)
|
|
(incf (gethash symbol counts-hash))))))
|
|
(maphash #'(lambda (symbol count)
|
|
(assert-true (> count 0))
|
|
(if (> count 1)
|
|
(setf (gethash symbol counts-hash) 0)
|
|
(remhash symbol counts-hash)))
|
|
counts-hash))
|
|
(dolist (production-rhs-component rhs-components)
|
|
(let ((subscript nil))
|
|
(when (general-grammar-symbol? production-rhs-component)
|
|
(let ((symbol (general-grammar-symbol-symbol production-rhs-component)))
|
|
(when (gethash symbol counts-hash)
|
|
(setq subscript (incf (gethash symbol counts-hash))))))
|
|
(depict-space markup-stream)
|
|
(depict-production-rhs-component markup-stream production-rhs-component subscript))))
|
|
(depict markup-stream " " ':left-angle-quote "empty" :right-angle-quote))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; PRODUCTIONS
|
|
|
|
;;; A production describes the expansion of a nonterminal (the lhs) into
|
|
;;; a string of zero or more grammar symbols (the rhs); the rhs may also have constraints attached.
|
|
;;; Each production has a unique number. Earlier productions have smaller numbers.
|
|
;;; There is exactly one production structure for a given production, so eq can be
|
|
;;; used to test for production equality.
|
|
;;;
|
|
;;; The evaluator is a lisp form that evaluates to a function f that takes one argument --
|
|
;;; the old state of the parser's value stack -- and returns the new state of that stack.
|
|
(defstruct (production (:include general-production (lhs nil :type nonterminal :read-only t))
|
|
(:constructor make-production (lhs rhs constraints name rhs-length number))
|
|
(:copier nil) (:predicate production?))
|
|
(rhs-length nil :type integer :read-only t) ;Number of grammar symbols in the rhs
|
|
(number nil :type integer :read-only t) ;This production's serial number
|
|
;The following fields are used for the action generator.
|
|
(actions nil :type list) ;List of (action-symbol . action-or-nil) in the same order as the action-symbols
|
|
; ; are listed in the grammar's action-signatures hash table for this lhs
|
|
(n-action-args nil :type (or null integer)) ;Total size of the action-signatures of all grammar symbols in the rhs
|
|
(evaluator-code nil) ;The lisp evaluator's source code
|
|
(evaluator nil :type (or null function))) ;The lisp evaluator of the action
|
|
|
|
|
|
; Return a list of terminals in this production's rhs.
|
|
; The list may contain duplicates.
|
|
(declaim (inline production-terminals))
|
|
(defun production-terminals (production)
|
|
(remove-if (complement #'terminal?) (production-rhs production)))
|
|
|
|
|
|
; Return a list of nonterminals in this production's rhs.
|
|
; The list may contain duplicates.
|
|
(declaim (inline production-nonterminals))
|
|
(defun production-nonterminals (production)
|
|
(remove-if (complement #'nonterminal?) (production-rhs production)))
|
|
|
|
|
|
(defun print-production (production &optional (stream t))
|
|
(format stream "~<~W -> ~:I~_~:[<epsilon> ~;~:*~{~W ~:_~}~]~:> ~:_[~W]"
|
|
(list (production-lhs production) (general-production-rhs-components production))
|
|
(production-name production)))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GENERIC PRODUCTIONS
|
|
|
|
;;; A generic production describes a set of productions generated by instantiating a
|
|
;;; production with a generic nonterminal on the lhs. All productions in this set are
|
|
;;; generated by replacing the arguments in the lhs nonterminal with attributes and making
|
|
;;; corresponding replacements on the production's rhs. All productions in this set share
|
|
;;; the same name. Note that this set might not cover all productions with the same
|
|
;;; nonterminal symbol on the lhs because some parameters of the lhs nonterminal might be
|
|
;;; originally listed as attributes and not arguments.
|
|
;;; A generic production is not a production and does not have a number or actions.
|
|
|
|
(defstruct (generic-production (:include general-production (lhs nil :type generic-nonterminal :read-only t))
|
|
(:constructor make-generic-production (lhs rhs constraints name productions))
|
|
(:copier nil)
|
|
(:predicate generic-production?))
|
|
(productions nil :type list :read-only t)) ;List of instantiations of this generic production
|
|
|
|
|
|
; Return a new generic-production or production with the given nonterminal argument replaced by the given attribute.
|
|
; If the resulting production isn't generic, an existing attributed production is returned.
|
|
(defun generic-production-substitute (grammar-parametrization attribute argument generic-production)
|
|
(let ((new-lhs (general-grammar-symbol-substitute attribute argument (generic-production-lhs generic-production)))
|
|
(productions (generic-production-productions generic-production)))
|
|
(if (generic-nonterminal? new-lhs)
|
|
(make-generic-production
|
|
new-lhs
|
|
(mapcar #'(lambda (grammar-symbol)
|
|
(general-grammar-symbol-substitute attribute argument grammar-symbol))
|
|
(generic-production-rhs generic-production))
|
|
(general-production-constraints generic-production)
|
|
(generic-production-name generic-production)
|
|
(remove-if #'(lambda (production)
|
|
(not (general-nonterminal-is-instance? grammar-parametrization new-lhs (production-lhs production))))
|
|
productions))
|
|
|
|
(assert-non-null (find new-lhs productions :key #'production-lhs :test #'eq)))))
|
|
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GENERALIZED RULES
|
|
|
|
;;; A general-rule is a common base class for the rule and generic-rule classes.
|
|
(defstruct (general-rule (:constructor nil) (:copier nil) (:predicate general-rule?))
|
|
(productions nil :type list)) ;The list of all general productions for this rule's general nonterminal lhs
|
|
|
|
|
|
; Return this rule's lhs (which is the same for all productions).
|
|
(declaim (inline general-rule-lhs))
|
|
(defun general-rule-lhs (general-rule)
|
|
(general-production-lhs (first (general-rule-productions general-rule))))
|
|
|
|
|
|
; Emit markup paragraphs for the grammar general rule.
|
|
; If the rule is short enough (only one production), emit the rule on one line.
|
|
(defun depict-general-rule (markup-stream general-rule)
|
|
(depict-block-style (markup-stream ':grammar-rule)
|
|
(let ((general-productions (general-rule-productions general-rule)))
|
|
(assert-true general-productions)
|
|
(if (cdr general-productions)
|
|
(labels
|
|
((emit-general-productions (general-productions first)
|
|
(let ((general-production (first general-productions))
|
|
(rest (rest general-productions)))
|
|
(depict-general-production-rhs markup-stream general-production first (endp rest))
|
|
(when rest
|
|
(emit-general-productions rest nil)))))
|
|
(depict-general-production-lhs markup-stream (general-rule-lhs general-rule))
|
|
(emit-general-productions general-productions t))
|
|
(depict-paragraph (markup-stream ':grammar-lhs-last)
|
|
(depict-general-production markup-stream (first general-productions) :definition))))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; RULES
|
|
|
|
;;; A rule is the set of all productions with the same lhs nonterminal.
|
|
;;; There is exactly one rule structure for a given nonterminal lhs, so eq can be
|
|
;;; used to test for rule equality.
|
|
;;;
|
|
;;; If the rule cannot somehow produce an empty expansion, then passthrough-terminals is empty.
|
|
;;; Otherwise, passthrough-terminals summarizes the constraints imposed on the next lookahead terminal
|
|
;;; imposed by all empty expansions of this rule. If these empty expansions do not impose any
|
|
;;; lookahead-constraints, then passthrough-terminals will be the full set.
|
|
(defstruct (rule (:include general-rule (productions nil :type list :read-only t))
|
|
(:constructor make-rule (productions))
|
|
(:copier nil)
|
|
(:predicate rule?))
|
|
;productions ;The list of all productions for this rule's nonterminal lhs
|
|
(number nil :type (or null integer)) ;This nonterminal's serial number
|
|
(passthrough-terminals *empty-terminalset* :type terminalset) ;See above
|
|
(initial-terminals *empty-terminalset* :type terminalset)) ;Set of all terminals that can begin some expansion of this nonterminal
|
|
|
|
|
|
; Return a list of nonterminals in this rule's rhs.
|
|
; The list may contain duplicates.
|
|
(defun rule-nonterminals (rule)
|
|
(mapcan #'(lambda (production) (copy-list (production-nonterminals production)))
|
|
(rule-productions rule)))
|
|
|
|
|
|
; Return a unique serial number for the given nonterminal.
|
|
(defun nonterminal-number (grammar nonterminal)
|
|
(rule-number (assert-non-null (gethash nonterminal (grammar-rules grammar))
|
|
"Can't find nonterminal ~S" nonterminal)))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GENERIC RULES
|
|
|
|
;;; A generic rule is the set of all generic productions with the same lhs generic nonterminal.
|
|
(defstruct (generic-rule (:include general-rule)
|
|
(:constructor make-generic-rule (productions))
|
|
(:copier nil)
|
|
(:predicate generic-rule?)))
|
|
; productions ;The list of all generic-productions for this rule's generic nonterminal lhs
|
|
|
|
|
|
; Return a new generic-rule with the given nonterminal argument replaced by the given attribute.
|
|
; The resulting rule must still be generic -- it is an error to call this so that it would result
|
|
; in a rule with a plain or attributed lhs nonterminal.
|
|
(defun generic-rule-substitute (grammar-parametrization attribute argument generic-rule)
|
|
(make-generic-rule
|
|
(mapcar #'(lambda (generic-production)
|
|
(assert-type (generic-production-substitute grammar-parametrization attribute argument generic-production)
|
|
generic-production))
|
|
(generic-rule-productions generic-rule))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GRAMMAR ITEMS
|
|
|
|
;;; An item is a tuple <prod, position>, where prod is a production and
|
|
;;; position is an integer between 0 and length(rhs(production)), inclusive.
|
|
;;; The first position elements of production's rhs are called "seen"
|
|
;;; and the rest are called "unseen".
|
|
;;; There is exactly one item structure for a given <prod, position> combination,
|
|
;;; so eq can be used to test for item equality.
|
|
(defstruct (item (:constructor allocate-item (production dot unseen number next))
|
|
(:copier nil)
|
|
(:predicate item?))
|
|
(production nil :type production :read-only t) ;The production to which this item corresponds
|
|
(dot nil :type integer :read-only t) ;List of grammar symbols to which that nonterminal expands
|
|
(unseen nil :type list :read-only t) ;Portion of production's rhs to the right of the dot
|
|
(number nil :type integer :read-only t) ;Unique number (i.e. different from any other item's number)
|
|
(next nil :type item :read-only t)) ;The item with the same production but dot shifted one to the right; nil if unseen is nil
|
|
|
|
|
|
; Return the first grammar symbol to the right of the item's dot or nil if
|
|
; the dot is already at the rightmost position.
|
|
(declaim (inline item-next-symbol))
|
|
(defun item-next-symbol (item)
|
|
(first (item-unseen item)))
|
|
|
|
|
|
; Return the lhs of the item's production.
|
|
(declaim (inline item-lhs))
|
|
(defun item-lhs (item)
|
|
(production-lhs (item-production item)))
|
|
|
|
|
|
; Return the constraints of the item's production.
|
|
(declaim (inline item-constraints))
|
|
(defun item-constraints (item)
|
|
(production-constraints (item-production item)))
|
|
|
|
|
|
; Make an item with the given production and dot location (which must be an integer
|
|
; between 0 and length(rhs(production)), inclusive. Reuse an existing item in the
|
|
; grammar if possible.
|
|
(defun make-item (grammar production dot)
|
|
(let* ((items-hash (grammar-items-hash grammar))
|
|
(key (cons production dot))
|
|
(existing-item (gethash key items-hash)))
|
|
(or
|
|
existing-item
|
|
(progn
|
|
(assert-true (<= dot (length (production-rhs production))))
|
|
(let* ((unseen (nthcdr dot (production-rhs production)))
|
|
(next (and unseen (make-item grammar production (1+ dot))))
|
|
(number (+ (* (production-number production) (1+ (grammar-max-production-length grammar))) dot)))
|
|
(setf (gethash key items-hash)
|
|
(allocate-item production dot unseen number next)))))))
|
|
|
|
|
|
(defun print-item (item &optional (stream t) after-dot)
|
|
(let ((production (item-production item)))
|
|
(format stream "~W -> ~:_" (production-lhs production))
|
|
(pprint-logical-block (stream (production-rhs production))
|
|
(do ((pos 0 (1+ pos))
|
|
(constraints (production-constraints production))
|
|
(first t))
|
|
()
|
|
(when (= pos (item-dot item))
|
|
(if first
|
|
(setq first nil)
|
|
(format stream " ~:_"))
|
|
(write-char #\. stream)
|
|
(when after-dot
|
|
(format stream " ~:_~W" after-dot)))
|
|
(do ()
|
|
((or (endp constraints) (/= (constraint-pos (first constraints)) pos)))
|
|
(if first
|
|
(setq first nil)
|
|
(format stream " ~:_"))
|
|
(write (pop constraints) :stream stream))
|
|
(pprint-exit-if-list-exhausted)
|
|
(if first
|
|
(setq first nil)
|
|
(format stream " ~:_"))
|
|
(write (pprint-pop) :stream stream)))))
|
|
|
|
(defmethod print-object ((item item) stream)
|
|
(print-unreadable-object (item stream)
|
|
(format stream "item ~@_")
|
|
(print-item item stream)))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GRAMMAR LAITEMS
|
|
|
|
;;; A laitem is an item with associated lookahead information.
|
|
;;; Unlike items, laitem structures are not shared among the states.
|
|
(defstruct (laitem (:constructor allocate-laitem (grammar item forbidden lookaheads))
|
|
(:copier nil)
|
|
(:predicate laitem?))
|
|
(grammar nil :type grammar :read-only t) ;The grammar to which this laitem belongs
|
|
(item nil :type item :read-only t) ;The item to which this laitem corresponds
|
|
(forbidden nil :type terminalset :read-only t) ;A set of terminals that must not occur after the dot because of lookahead-constraints
|
|
(lookaheads nil :type terminalset) ;Set of lookahead terminals
|
|
(propagates nil :type list)) ;List of (laitem . mask) to which lookaheads propagate from this laitem (see note below)
|
|
;When parsing a LALR(1) grammar, propagates contains all laitems (in this and other states)
|
|
;to which lookaheads propagate from this laitem. The mask in each entry of propagates
|
|
;is a terminalset that indicates which lookaheads can propagate; this is usually *full-terminalset*
|
|
;but can be smaller in the presence of constraints.
|
|
;When parsing a LR(1) grammar, propagates is the list of all laitems to which lookaheads propagate from this laitem
|
|
;without following a shift transition. Such laitems must necessarily be in the same state. In the LR(1) case each
|
|
;laitem listed in the propagates list must come after this laitem in this state's laitems list.
|
|
|
|
|
|
; Add or modify a propagation entry in src-laitem to point to dst-laitem with
|
|
; the given mask.
|
|
(defun laitem-add-propagation (src-laitem dst-laitem mask)
|
|
(let ((propagation-entry (assoc dst-laitem (laitem-propagates src-laitem))))
|
|
(if propagation-entry
|
|
(terminalset-union-f (cdr propagation-entry) mask)
|
|
(push (cons dst-laitem mask) (laitem-propagates src-laitem)))))
|
|
|
|
|
|
(defvar *lookahead-print-column* 70)
|
|
|
|
(defun print-laitem (laitem &optional (stream t))
|
|
(let* ((grammar (laitem-grammar laitem))
|
|
(item (laitem-item laitem))
|
|
(forbidden (laitem-forbidden laitem))
|
|
(forbidden-as-constraint
|
|
(and (not (terminalset-empty? forbidden))
|
|
(let ((forbidden-terminals (terminalset-list grammar forbidden)))
|
|
(make-lookahead-constraint (item-dot item) forbidden-terminals forbidden-terminals)))))
|
|
(print-item item stream forbidden-as-constraint)
|
|
(format stream " ~vT~_" *lookahead-print-column*)
|
|
(pprint-logical-block (stream nil :prefix "{" :suffix "}")
|
|
(print-terminalset grammar (laitem-lookaheads laitem) stream))))
|
|
|
|
(defmethod print-object ((laitem laitem) stream)
|
|
(print-unreadable-object (laitem stream)
|
|
(format stream "laitem ~@_")
|
|
(print-laitem laitem stream)))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GRAMMAR TRANSITIONS
|
|
|
|
;;; A grammar transition is one of the following:
|
|
;;; (:shift state) ;Shift and go to the given state
|
|
;;; (:reduce production) ;Reduce by the given production
|
|
;;; (:accept) ;Accept the input
|
|
|
|
(declaim (inline make-shift-transition))
|
|
(defun make-shift-transition (state)
|
|
(assert-type state state)
|
|
(list :shift state))
|
|
|
|
(declaim (inline make-reduce-transition))
|
|
(defun make-reduce-transition (production)
|
|
(assert-type production production)
|
|
(list :reduce production))
|
|
|
|
(declaim (inline make-accept-transition))
|
|
(defun make-accept-transition ()
|
|
(list :accept))
|
|
|
|
|
|
(declaim (inline transition-kind))
|
|
(defun transition-kind (transition)
|
|
(first transition))
|
|
|
|
(declaim (inline transition-state))
|
|
(defun transition-state (transition)
|
|
(assert-true (eq (first transition) :shift))
|
|
(second transition))
|
|
|
|
(defun (setf transition-state) (state transition)
|
|
(assert-true (eq (first transition) :shift))
|
|
(setf (second transition) state))
|
|
|
|
(declaim (inline transition-production))
|
|
(defun transition-production (transition)
|
|
(assert-true (eq (first transition) :reduce))
|
|
(second transition))
|
|
|
|
|
|
(defun print-transition (transition stream)
|
|
(case (transition-kind transition)
|
|
(:shift (format stream "shift S~D" (state-number (transition-state transition))))
|
|
(:reduce (format stream "reduce ~W" (production-name (transition-production transition))))
|
|
(:accept (format stream "accept"))
|
|
(t (error "Bad transition: ~S" transition))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GRAMMAR STATES
|
|
|
|
(defstruct (state (:constructor allocate-state (number kernel laitems))
|
|
(:copier nil)
|
|
(:predicate state?))
|
|
(number nil :type integer :read-only t) ;Serial number of the state
|
|
(kernel nil :type list :read-only t) ;List of kernel items in order of increasing item-number values
|
|
(laitems nil :type list :read-only t) ;List of laitems (topologically sorted by the propagates relation when parsing LR(1))
|
|
(transitions nil :type list) ;List of (terminal . transition)
|
|
(gotos nil :type list)) ;List of (nonterminal . state)
|
|
|
|
|
|
; If all outgoing transitions from the state are the same reduction, return that
|
|
; reduction's production; otherwise return nil.
|
|
(defun forwarding-state-production (state)
|
|
(let ((transitions (state-transitions state)))
|
|
(and transitions
|
|
(let ((transition (cdar transitions)))
|
|
(and (eq (transition-kind transition) :reduce)
|
|
(null (rassoc transition (cdr transitions) :test (complement #'equal)))
|
|
(transition-production transition))))))
|
|
|
|
|
|
; Return the laitem corresponding to the given item in the given state.
|
|
(declaim (inline state-laitem))
|
|
(defun state-laitem (state item)
|
|
(assert-non-null (find item (state-laitems state) :key #'laitem-item)
|
|
"State ~S doesn't have item ~S" state item))
|
|
|
|
|
|
; Return true if this laitem is in the state's kernel either because the dot
|
|
; is not at the leftmost position or because the lhs is the start nonterminal.
|
|
(defun laitem-in-kernel? (laitem)
|
|
(let ((item (laitem-item laitem)))
|
|
(or (not (zerop (item-dot item)))
|
|
(grammar-symbol-= (item-lhs item) *start-nonterminal*))))
|
|
|
|
|
|
; Return the state's list of laitems sorted by the criteria below.
|
|
; Criterion 1 is the major sorting order, criterion 2 decides the sorting order of items
|
|
; equal by criterion 1, etc.
|
|
; 1. laitems whose lhs matches the lhs of any kernel item in this state come before
|
|
; laitems whose lhs does not match the lhs of any kernel item in this state.
|
|
; 2. laitems are sorted by the number of their lhs nonterminal.
|
|
; 3. laitems in this state's kernel come before laitems not in the kernel.
|
|
; 4. laitems earlier in (state-laitems state) come before laitems later in (state-laitems state).
|
|
(defun laitems-sorted-nicely (state)
|
|
(let ((laitems (copy-list (state-laitems state))))
|
|
(when laitems
|
|
(let ((grammar (laitem-grammar (first laitems))))
|
|
(labels
|
|
((lhs-matches-some-kernel-item (lhs-nonterminal)
|
|
(member lhs-nonterminal (state-kernel state) :test *grammar-symbol-=* :key #'item-lhs))
|
|
(laitem-< (laitem1 laitem2)
|
|
(let* ((item1 (laitem-item laitem1))
|
|
(item2 (laitem-item laitem2))
|
|
(lhs1 (item-lhs item1))
|
|
(lhs2 (item-lhs item2))
|
|
(lhs-number-1 (nonterminal-number grammar lhs1))
|
|
(lhs-number-2 (nonterminal-number grammar lhs2))
|
|
(item1-lhs-matches-some-kernel-item (lhs-matches-some-kernel-item lhs1))
|
|
(item2-lhs-matches-some-kernel-item (lhs-matches-some-kernel-item lhs2)))
|
|
(cond
|
|
((and item1-lhs-matches-some-kernel-item (not item2-lhs-matches-some-kernel-item)) t)
|
|
((and (not item1-lhs-matches-some-kernel-item) item2-lhs-matches-some-kernel-item) nil)
|
|
((< lhs-number-1 lhs-number-2) t)
|
|
((> lhs-number-1 lhs-number-2) nil)
|
|
(t (let ((item1-in-kernel (laitem-in-kernel? laitem1))
|
|
(item2-in-kernel (laitem-in-kernel? laitem2)))
|
|
(cond
|
|
((and item1-in-kernel (not item2-in-kernel)) t)
|
|
((and (not item1-in-kernel) item2-in-kernel) nil)
|
|
(t (dolist (state-laitem (state-laitems state) nil)
|
|
(cond
|
|
((eq state-laitem laitem2) (return nil))
|
|
((eq state-laitem laitem1) (return t))))))))))))
|
|
(stable-sort laitems #'laitem-<))))))
|
|
|
|
|
|
(defun print-state (state &optional (stream t))
|
|
(pprint-logical-block (stream nil)
|
|
(format stream "S~D:" (state-number state))
|
|
(pprint-indent :block 2 stream)
|
|
(pprint-newline :mandatory stream)
|
|
(pprint-logical-block (stream (laitems-sorted-nicely state))
|
|
(do ((first t nil))
|
|
()
|
|
(pprint-exit-if-list-exhausted)
|
|
(unless first
|
|
(pprint-newline :mandatory stream))
|
|
(let ((laitem (pprint-pop)))
|
|
(pprint-logical-block (stream nil)
|
|
(unless (laitem-in-kernel? laitem)
|
|
(write-string " " stream))
|
|
(pprint-indent :block 4 stream)
|
|
(print-laitem laitem stream)))))
|
|
(when (state-transitions state)
|
|
(pprint-newline :mandatory stream)
|
|
(pprint-logical-block (stream (collect-equivalences (state-transitions state) :test #'equal) :prefix "Transitions: ")
|
|
(loop
|
|
(pprint-exit-if-list-exhausted)
|
|
(let ((transition-cons (pprint-pop)))
|
|
(pprint-logical-block (stream nil)
|
|
(pprint-fill stream (car transition-cons) nil)
|
|
(format stream " ~2I~_=> " (car transition-cons))
|
|
(print-transition (cdr transition-cons) stream))
|
|
(format stream " ~:_")))))
|
|
(when (state-gotos state)
|
|
(pprint-newline :mandatory stream)
|
|
(pprint-logical-block (stream (state-gotos state) :prefix "Gotos: ")
|
|
(loop
|
|
(pprint-exit-if-list-exhausted)
|
|
(let ((goto-cons (pprint-pop)))
|
|
(format stream "~@<~W ~:_=> ~:_S~D~:> ~:_" (car goto-cons) (state-number (cdr goto-cons)))))))))
|
|
|
|
|
|
(defmethod print-object ((state state) stream)
|
|
(print-unreadable-object (state stream)
|
|
(format stream "state S~D" (state-number state))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; PARAMETER TREES
|
|
|
|
;;; A parameter tree describes which nonterminal parameters are generic and which are specific in all
|
|
;;; productions derived from the same nonterminal symbol. The tree has as many levels as there are
|
|
;;; parameters for that nonterminal symbol. A level is generic if it is generic in all applicable
|
|
;;; productions; otherwise it is specific.
|
|
;;;
|
|
;;; The parameter tree (and each of its subtrees) can have one of three forms:
|
|
;;; (:rule <general-rule>)
|
|
;;; if the generic nonterminal has no remaining parameters;
|
|
;;; (:argument <argument> <parameter-subtree>)
|
|
;;; if the first remaining nonterminal parameter is an argument in all productions seen so far;
|
|
;;; (:attributes <argument-or-nil> (<attribute> . <parameter-subtree>) (<attribute> . <parameter-subtree>) ...)
|
|
;;; if the first remaining nonterminal parameter can be one of several attributes in productions seen so far;
|
|
;;; if argument-or-nil is not nil, it is an argument that includes all of the provided attributes.
|
|
;;;
|
|
;;; The parameter tree (and each of its subtrees) is mutable and updated in place.
|
|
;;;
|
|
|
|
|
|
; Create and return an initial parameter tree for the given remaining parameters
|
|
; of the general-production.
|
|
(defun make-parameter-subtree (grammar parameters general-production)
|
|
(cond
|
|
(parameters
|
|
(let ((parameter (first parameters))
|
|
(subtree (make-parameter-subtree grammar (rest parameters) general-production)))
|
|
(if (nonterminal-argument? parameter)
|
|
(list ':argument parameter subtree)
|
|
(list ':attributes nil (cons parameter subtree)))))
|
|
((production? general-production)
|
|
(list ':rule (grammar-rule grammar (production-lhs general-production))))
|
|
(t
|
|
(assert-true (generic-production? general-production))
|
|
(list ':rule (make-generic-rule (list general-production))))))
|
|
|
|
|
|
; Create and return an initial parameter tree for the general-production.
|
|
(defun make-parameter-tree (grammar general-production)
|
|
(make-parameter-subtree grammar
|
|
(general-nonterminal-parameters (general-production-lhs general-production))
|
|
general-production))
|
|
|
|
|
|
; The parameter subtree has kind :argument at its top level. Convert it in place
|
|
; into kind :attributes.
|
|
(defun parameter-subtree-divide-argument (grammar parameter-subtree)
|
|
(let ((argument (second parameter-subtree))
|
|
(argument-subtree (third parameter-subtree)))
|
|
(labels
|
|
((substitute-argument-with (attribute subtree)
|
|
(ecase (first subtree)
|
|
(:rule
|
|
(let* ((general-rule (second subtree))
|
|
(lhs (general-rule-lhs general-rule))
|
|
(new-lhs (general-grammar-symbol-substitute attribute argument lhs)))
|
|
(assert-true (generic-rule? general-rule))
|
|
(list ':rule
|
|
(if (generic-nonterminal? new-lhs)
|
|
(generic-rule-substitute grammar attribute argument general-rule)
|
|
(grammar-rule grammar new-lhs)))))
|
|
(:argument
|
|
(list ':argument
|
|
(second subtree)
|
|
(substitute-argument-with attribute (third subtree))))
|
|
(:attributes
|
|
(list ':attributes
|
|
(second subtree)
|
|
(mapcar #'(lambda (argument-subtree-binding)
|
|
(cons (car argument-subtree-binding)
|
|
(substitute-argument-with attribute (cdr argument-subtree-binding))))
|
|
(cddr subtree))))))
|
|
|
|
(create-attribute-subtree-binding (attribute)
|
|
(cons attribute (substitute-argument-with attribute argument-subtree))))
|
|
|
|
(setf (first parameter-subtree) ':attributes)
|
|
(setf (cddr parameter-subtree)
|
|
(mapcar #'create-attribute-subtree-binding
|
|
(grammar-parametrization-lookup-argument grammar argument))))))
|
|
|
|
|
|
; Mutate the parameter subtree to add the general-production to its rules.
|
|
; parameters are the nonterminal parameters for the remaining subtree levels.
|
|
(defun parameter-subtree-add-production (grammar parameter-subtree parameters general-production)
|
|
(let ((lhs (general-production-lhs general-production))
|
|
(kind (first parameter-subtree)))
|
|
(cond
|
|
((eq kind :rule)
|
|
(let ((general-rule (second parameter-subtree)))
|
|
(cond
|
|
(parameters
|
|
(error "Extra nonterminal parameters ~S for ~S" parameters lhs))
|
|
((production? general-production)
|
|
(assert-true (member general-production (rule-productions general-rule) :test #'eq)))
|
|
(t
|
|
(assert-true (generic-production? general-production))
|
|
(assert-true (not (member general-production (generic-rule-productions general-rule) :test #'eq)))
|
|
(setf (generic-rule-productions general-rule)
|
|
(nconc (generic-rule-productions general-rule) (list general-production)))))))
|
|
((null parameters)
|
|
(error "Missing nonterminal parameters in ~S" lhs))
|
|
(t
|
|
(let ((parameter (first parameters))
|
|
(parameters-rest (rest parameters)))
|
|
(ecase kind
|
|
(:argument
|
|
(let ((argument (second parameter-subtree))
|
|
(argument-subtree (third parameter-subtree)))
|
|
(if (nonterminal-argument? parameter)
|
|
(if (eq parameter argument)
|
|
(parameter-subtree-add-production grammar argument-subtree parameters-rest general-production)
|
|
(error "Nonterminal argument conflict: ~S vs. ~S" parameter argument))
|
|
(progn
|
|
(parameter-subtree-divide-argument grammar parameter-subtree)
|
|
(parameter-subtree-add-production grammar parameter-subtree parameters general-production)))))
|
|
(:attributes
|
|
(let ((argument (second parameter-subtree))
|
|
(attribute-subtree-bindings (cddr parameter-subtree)))
|
|
(if (nonterminal-argument? parameter)
|
|
(let ((argument-attributes (grammar-parametrization-lookup-argument grammar parameter)))
|
|
(labels
|
|
((add-attribute-production (attribute-subtree-binding)
|
|
(let ((attribute (car attribute-subtree-binding)))
|
|
(unless (member attribute argument-attributes :test #'eq)
|
|
(error "Attribute ~S is not a member of argument ~S" attribute parameter))
|
|
(parameter-subtree-add-production
|
|
grammar
|
|
(cdr attribute-subtree-binding)
|
|
parameters-rest
|
|
(generic-production-substitute grammar attribute parameter general-production)))))
|
|
(cond
|
|
((null argument)
|
|
(mapc #'add-attribute-production attribute-subtree-bindings)
|
|
(setf (cddr parameter-subtree)
|
|
(nconc attribute-subtree-bindings
|
|
(mapcan #'(lambda (attribute)
|
|
(unless (assoc attribute attribute-subtree-bindings :test #'eq)
|
|
(list
|
|
(cons attribute
|
|
(make-parameter-subtree
|
|
grammar
|
|
parameters-rest
|
|
(generic-production-substitute grammar attribute parameter general-production))))))
|
|
argument-attributes)))
|
|
(setf (second parameter-subtree) parameter))
|
|
((eq parameter argument)
|
|
(assert-true (= (length attribute-subtree-bindings) (length argument-attributes)))
|
|
(mapc #'add-attribute-production attribute-subtree-bindings))
|
|
(t (error "Nonterminal argument conflict: ~S vs. ~S" parameter argument)))))
|
|
|
|
(let ((attribute-subtree-binding (assoc parameter attribute-subtree-bindings :test #'eq)))
|
|
(cond
|
|
(attribute-subtree-binding
|
|
(parameter-subtree-add-production grammar (cdr attribute-subtree-binding) parameters-rest general-production))
|
|
(argument
|
|
(error "Attribute ~S is not a member of argument ~S" parameter argument))
|
|
(t
|
|
(setf (cddr parameter-subtree)
|
|
(nconc attribute-subtree-bindings
|
|
(list (cons parameter
|
|
(make-parameter-subtree grammar parameters-rest general-production)))))))))))))))))
|
|
|
|
|
|
; Mutate the parameter tree to add the general-production to its rules.
|
|
(defun parameter-tree-add-production (grammar parameter-tree general-production)
|
|
(parameter-subtree-add-production grammar
|
|
parameter-tree
|
|
(general-nonterminal-parameters (general-production-lhs general-production))
|
|
general-production)
|
|
parameter-tree)
|
|
|
|
|
|
; Return a freshly consed list of all rules and generic-rules in the parameter tree.
|
|
(defun parameter-tree-general-rules (parameter-tree)
|
|
(ecase (first parameter-tree)
|
|
(:rule (list (second parameter-tree)))
|
|
(:argument (parameter-tree-general-rules (third parameter-tree)))
|
|
(:attributes
|
|
(mapcan
|
|
#'(lambda (argument-subtree-binding)
|
|
(parameter-tree-general-rules (cdr argument-subtree-binding)))
|
|
(cddr parameter-tree)))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; GRAMMAR
|
|
|
|
(defstruct (grammar (:include grammar-parametrization)
|
|
(:constructor allocate-grammar)
|
|
(:copier nil)
|
|
(:predicate grammar?))
|
|
(terminals nil :type simple-vector :read-only t) ;Vector of all terminals (in order of terminal numbers)
|
|
(nonterminals nil :type simple-vector :read-only t) ;Vector of all nonterminals (in a depth-first order)
|
|
(nonterminals-list nil :type list :read-only t) ;List version of the nonterminals vector
|
|
(terminal-numbers nil :type hash-table :read-only t) ;Hash table of terminal -> terminal number
|
|
(terminal-actions nil :type hash-table :read-only t) ;Hash table of terminal -> list of (action-symbol . action-function-or-nil)
|
|
(rules nil :type hash-table :read-only t) ;Hash table of nonterminal -> rule
|
|
(parameter-trees nil :type hash-table :read-only t) ;Hash table of nonterminal-symbol -> parameter-tree
|
|
(max-production-length nil :type integer :read-only t) ;Maximum number of grammar symbols in the rhs of a production
|
|
(general-productions nil :type hash-table :read-only t);Hash table of production-name -> general-production
|
|
(n-productions nil :type integer :read-only t) ;Number of productions in the grammar
|
|
;The following fields are used for the parser.
|
|
(items-hash nil :type hash-table :read-only t) ;Hash table of (production . dot) -> item
|
|
(states nil :type list) ;List of LR(0) states (in order of state numbers)
|
|
;The following fields are used for the action generator.
|
|
(action-signatures nil :type (or null hash-table))) ;Hash table of grammar-symbol -> list of (action-symbol . type-or-type-expr)
|
|
|
|
|
|
; Return a rule for the given nonterminal lhs.
|
|
(defun grammar-rule (grammar lhs)
|
|
(or (gethash lhs (grammar-rules grammar))
|
|
(error "Can't find rule for ~S" lhs)))
|
|
|
|
|
|
; Return a list of general-rules that together form a partition of all productions whose lhs's
|
|
; are instances of the given lhs-general-nonterminal.
|
|
; The given lhs-general-nonterminal must have been given as the lhs for at least one general production
|
|
; source when constructing this grammar.
|
|
(defun grammar-general-rules (grammar lhs-general-nonterminal)
|
|
(if (generic-nonterminal? lhs-general-nonterminal)
|
|
(parameter-tree-general-rules (gethash (generic-nonterminal-symbol lhs-general-nonterminal) (grammar-parameter-trees grammar)))
|
|
(list (grammar-rule grammar lhs-general-nonterminal))))
|
|
|
|
|
|
; Return the start production for the grammar.
|
|
(defun grammar-start-production (grammar)
|
|
(assert-non-null (first (rule-productions (grammar-rule grammar *start-nonterminal*)))))
|
|
|
|
|
|
; Return the user (not internal) start-symbol of the grammar.
|
|
(defun gramar-user-start-symbol (grammar)
|
|
(assert-non-null (first (production-rhs (grammar-start-production grammar)))))
|
|
|
|
|
|
; Return the production with the given name in the grammar.
|
|
; The returned production may be a production or a generic-production.
|
|
; Signal an error if there is no such production.
|
|
(defun grammar-general-production (grammar production-name)
|
|
(or (gethash production-name (grammar-general-productions grammar))
|
|
(error "Production ~S not found" production-name)))
|
|
|
|
|
|
; Call f on each production in the grammar in an unspecified order.
|
|
(defun each-grammar-production (grammar f)
|
|
(maphash
|
|
#'(lambda (lhs rule)
|
|
(declare (ignore lhs))
|
|
(mapc f (rule-productions rule)))
|
|
(grammar-rules grammar)))
|
|
|
|
|
|
; Return the starting state for the grammar.
|
|
(defun grammar-start-state (grammar)
|
|
(assert-non-null (first (grammar-states grammar))))
|
|
|
|
|
|
; Return the state numbered n in the grammar.
|
|
(defun grammar-state (grammar n)
|
|
(assert-non-null (nth n (grammar-states grammar))))
|
|
|
|
|
|
; Return two values:
|
|
; a generate terminalset G;
|
|
; a passthrough terminalset P.
|
|
;
|
|
; G is the terminalset of all terminals x that satisfy
|
|
; symbol ==>* x rest,
|
|
; where rest is an arbitrary string of grammar symbols.
|
|
; P is the terminalset of all terminals x that satisfy
|
|
; symbol x ==> x
|
|
(defun symbol-initial-terminals (grammar symbol)
|
|
(assert-type symbol grammar-symbol)
|
|
(if (nonterminal? symbol)
|
|
(let ((rule (grammar-rule grammar symbol)))
|
|
(values (rule-initial-terminals rule) (rule-passthrough-terminals rule)))
|
|
(values (make-terminalset grammar symbol) *empty-terminalset*)))
|
|
|
|
|
|
; Given an arbitrary string of grammar symbols, a list of constraints, and an initial position,
|
|
; return two values:
|
|
; a generate terminalset G;
|
|
; a passthrough terminalset P.
|
|
;
|
|
; G is the terminalset of all terminals x that satisfy
|
|
; symbol-string ==>* x rest,
|
|
; where rest is an arbitrary string of grammar symbols.
|
|
; P is the terminalset of all terminals x that satisfy
|
|
; symbol-string x ==> x
|
|
;
|
|
; The constraints' positions are relative to the given initial position, which specifies the position
|
|
; of the first grammar-symbol in the symbol-string. The constraints must be listed in order of
|
|
; increasing positions.
|
|
(defun string-initial-terminals (grammar symbol-string constraints pos)
|
|
(do ((symbol-string symbol-string (cdr symbol-string))
|
|
(initial-terminals *empty-terminalset*)
|
|
(passthrough-terminals *full-terminalset*))
|
|
((terminalset-empty? passthrough-terminals) (values initial-terminals *empty-terminalset*))
|
|
(dolist (constraint constraints)
|
|
(when (and (lookahead-constraint? constraint)
|
|
(= (constraint-pos constraint) pos))
|
|
(terminalset-intersection-f passthrough-terminals (lookahead-constraint-terminalset constraint))))
|
|
(if symbol-string
|
|
(multiple-value-bind (generate passthrough) (symbol-initial-terminals grammar (first symbol-string))
|
|
(terminalset-union-f initial-terminals (terminalset-intersection passthrough-terminals generate))
|
|
(terminalset-intersection-f passthrough-terminals passthrough))
|
|
(return (values initial-terminals passthrough-terminals)))))
|
|
|
|
|
|
; Intern attributed or generic nonterminals in the production's lhs and rhs. Replace
|
|
; (:- <terminal> ... <terminal>) or (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
|
; sublists in the rhs with lookahead-constraints and put these, in order, after the third element of
|
|
; the returned list. Also replace :~ symbols with no-line-break-constraints.
|
|
; Return the resulting production source.
|
|
(defun intern-production-source (grammar-parametrization production-source)
|
|
(assert-type production-source (tuple (or user-nonterminal cons) (list (or user-grammar-symbol cons)) identifier))
|
|
(let ((production-lhs-source (first production-source))
|
|
(production-rhs-source (second production-source))
|
|
(production-name (third production-source)))
|
|
(if (or (consp production-lhs-source) (some #'consp production-rhs-source) (find ':~ production-rhs-source))
|
|
(multiple-value-bind (lhs-nonterminal lhs-arguments) (grammar-parametrization-intern grammar-parametrization production-lhs-source)
|
|
(let ((rhs nil)
|
|
(constraints nil)
|
|
(pos 0))
|
|
(dolist (component-source production-rhs-source)
|
|
(cond
|
|
((and (consp component-source) (eq (first component-source) ':-))
|
|
(let ((lookaheads (rest component-source)))
|
|
(push
|
|
(make-lookahead-constraint pos (assert-non-null lookaheads) lookaheads)
|
|
constraints)))
|
|
((and (consp component-source) (eq (first component-source) ':--))
|
|
(let ((lookaheads (rest component-source)))
|
|
(push
|
|
(make-lookahead-constraint pos (assert-non-null (rest lookaheads)) (assert-non-null (first lookaheads)))
|
|
constraints)))
|
|
((eq component-source ':~)
|
|
(push (make-no-line-break-constraint pos) constraints))
|
|
(t
|
|
(push (grammar-parametrization-intern grammar-parametrization component-source lhs-arguments) rhs)
|
|
(incf pos))))
|
|
(list* lhs-nonterminal (nreverse rhs) production-name (nreverse constraints))))
|
|
production-source)))
|
|
|
|
|
|
; Make a grammar structure out of a grammar in source form.
|
|
; A grammar-source is a list of productions; each production is a list of:
|
|
; a nonterminal A (the lhs);
|
|
; a list of grammar symbols forming A's expansion (the rhs);
|
|
; a production name.
|
|
; Nonterminals in the lhs and rhs can be parametrized; in this case such a nonterminal
|
|
; is represented by a list whose first element is the name and the remaining elements are
|
|
; the arguments or attributes. Any nonterminal argument in the rhs must also be an argument
|
|
; of the lhs nonterminal. The lhs nonterminal must not have duplicate arguments. The lhs
|
|
; nonterminal can have attributes, thereby designating a specialization instead of a fully
|
|
; generic production.
|
|
;
|
|
; The rhs can also contain lookahead constraints of the form
|
|
; (:- <terminal> ... <terminal>)
|
|
; which indicate that the following terminal must not be one of the listed terminals. The form
|
|
; (:-- (<grammar-symbol> ... <grammar-symbol>) <terminal> ... <terminal>)
|
|
; does the same thing except that it prints the grammar-symbols instead of the terminals when
|
|
; the production is printed. The form
|
|
; :~
|
|
; indicates that a line break is not allowed at the current position between terminals (this
|
|
; constraint is currently ignored by the parser but does show up in printed grammars).
|
|
;
|
|
; excluded-nonterminals-source is a list of nonterminals not used in the grammar. Productions,
|
|
; including productions expanded from generic productions, that have one of these nonterminals
|
|
; on the lhs are ignored.
|
|
(defun make-grammar (grammar-parametrization start-symbol grammar-source &optional excluded-nonterminals-source)
|
|
(let ((interned-grammar-source
|
|
(mapcar #'(lambda (production-source)
|
|
(intern-production-source grammar-parametrization production-source))
|
|
grammar-source))
|
|
(rules (make-hash-table :test #'eq))
|
|
(terminals-hash (make-hash-table :test *grammar-symbol-=*))
|
|
(general-productions (make-hash-table :test #'equal))
|
|
(production-number 0)
|
|
(max-production-length 1)
|
|
(excluded-nonterminals-hash (make-hash-table :test *grammar-symbol-=*))
|
|
(lookahead-constraints nil))
|
|
|
|
;Set up excluded-nonterminals-hash. The values of the hash table are either :seen or :unseen
|
|
;depending on whether a production with the particular nonterminal has been seen yet.
|
|
(dolist (excluded-nonterminal-source excluded-nonterminals-source)
|
|
(setf (gethash (grammar-parametrization-intern grammar-parametrization excluded-nonterminal-source nil) excluded-nonterminals-hash)
|
|
:unseen))
|
|
|
|
;Create the starting production: *start-nonterminal* ==> start-symbol
|
|
(setf (gethash *start-nonterminal* rules)
|
|
(list (make-production *start-nonterminal* (list start-symbol) nil nil 1 0)))
|
|
|
|
;Create the rest of the productions.
|
|
(flet
|
|
((create-production (lhs rhs constraints name)
|
|
(let ((production (make-production lhs rhs constraints name (length rhs) (incf production-number))))
|
|
(push production (gethash lhs rules))
|
|
(dolist (rhs-terminal (production-terminals production))
|
|
(setf (gethash rhs-terminal terminals-hash) t))
|
|
production))
|
|
|
|
(nonterminal-excluded (nonterminal)
|
|
(and (gethash nonterminal excluded-nonterminals-hash)
|
|
(setf (gethash nonterminal excluded-nonterminals-hash) :seen))))
|
|
|
|
(dolist (production-source interned-grammar-source)
|
|
(let* ((production-lhs (first production-source))
|
|
(production-rhs (second production-source))
|
|
(production-name (third production-source))
|
|
(production-constraints (cdddr production-source))
|
|
(lhs-arguments (general-grammar-symbol-arguments production-lhs)))
|
|
(setq max-production-length (max max-production-length (length production-rhs)))
|
|
(when (gethash production-name general-productions)
|
|
(error "Duplicate production name ~S" production-name))
|
|
(if lhs-arguments
|
|
(let ((productions nil))
|
|
(grammar-parametrization-each-permutation
|
|
grammar-parametrization
|
|
#'(lambda (bound-argument-alist)
|
|
(let ((instantiated-lhs (instantiate-general-grammar-symbol bound-argument-alist production-lhs)))
|
|
(unless (nonterminal-excluded instantiated-lhs)
|
|
(push (create-production
|
|
instantiated-lhs
|
|
(mapcar #'(lambda (general-grammar-symbol)
|
|
(instantiate-general-grammar-symbol bound-argument-alist general-grammar-symbol))
|
|
production-rhs)
|
|
production-constraints
|
|
production-name)
|
|
productions))))
|
|
lhs-arguments)
|
|
(when productions
|
|
(setf (gethash production-name general-productions)
|
|
(make-generic-production production-lhs production-rhs production-constraints production-name (nreverse productions)))))
|
|
|
|
(unless (nonterminal-excluded production-lhs)
|
|
(setf (gethash production-name general-productions)
|
|
(create-production production-lhs production-rhs production-constraints production-name)))))))
|
|
|
|
|
|
;Change all values of the rules hash table to contain rule structures
|
|
;instead of mere lists of rules. Also check that all referenced nonterminals
|
|
;have been defined.
|
|
(maphash
|
|
#'(lambda (rule-lhs rule-productions)
|
|
(dolist (rule-production rule-productions)
|
|
(dolist (grammar-symbol (production-rhs rule-production))
|
|
(when (nonterminal? grammar-symbol)
|
|
(unless (gethash grammar-symbol rules)
|
|
(error "Nonterminal ~S used but not defined" grammar-symbol))))
|
|
(dolist (constraint (production-constraints rule-production))
|
|
(when (lookahead-constraint? constraint)
|
|
(push constraint lookahead-constraints)
|
|
(dolist (lookahead-terminal (lookahead-constraint-forbidden-terminals constraint))
|
|
(unless (gethash lookahead-terminal terminals-hash)
|
|
(warn "Lookahead terminal ~S not used in main grammar" lookahead-terminal)
|
|
(setf (gethash lookahead-terminal terminals-hash) t))))))
|
|
(setf (gethash rule-lhs rules)
|
|
(make-rule (nreverse rule-productions))))
|
|
rules)
|
|
|
|
;Check that all excluded nonterminals have been seen.
|
|
(maphash
|
|
#'(lambda (excluded-nonterminal seen)
|
|
(unless (eq seen :seen)
|
|
(warn "Nonterminal ~S declared excluded but not defined" excluded-nonterminal)))
|
|
excluded-nonterminals-hash)
|
|
|
|
(let ((nonterminals-list (depth-first-search
|
|
*grammar-symbol-=*
|
|
#'(lambda (nonterminal) (rule-nonterminals (gethash nonterminal rules)))
|
|
*start-nonterminal*)))
|
|
(when (/= (length nonterminals-list) (hash-table-count rules))
|
|
(let ((dead-nonterminals (set-difference (hash-table-keys rules) nonterminals-list :test *grammar-symbol-=*)))
|
|
(warn "The following nonterminals are not reachable from start: ~S" dead-nonterminals)
|
|
(setq nonterminals-list (nconc nonterminals-list dead-nonterminals))))
|
|
|
|
(let ((terminals (coerce (cons *end-marker* (sorted-hash-table-keys terminals-hash))
|
|
'simple-vector))
|
|
(nonterminals (coerce nonterminals-list 'simple-vector)))
|
|
(dotimes (n (length terminals))
|
|
(setf (gethash (svref terminals n) terminals-hash) n))
|
|
(dotimes (n (length nonterminals))
|
|
(setf (rule-number (gethash (svref nonterminals n) rules)) n))
|
|
(let ((grammar (allocate-grammar
|
|
:argument-attributes (grammar-parametrization-argument-attributes grammar-parametrization)
|
|
:terminals terminals
|
|
:nonterminals-list nonterminals-list
|
|
:nonterminals nonterminals
|
|
:terminal-numbers terminals-hash
|
|
:terminal-actions (make-hash-table :test *grammar-symbol-=*)
|
|
:rules rules
|
|
:parameter-trees (make-hash-table :test *grammar-symbol-=*)
|
|
:max-production-length max-production-length
|
|
:general-productions general-productions
|
|
:n-productions production-number
|
|
:items-hash (make-hash-table :test #'equal))))
|
|
|
|
;Compute the terminalsets in the lookahead-constraints.
|
|
(dolist (lookahead-constraint lookahead-constraints)
|
|
(let ((terminalset *full-terminalset*))
|
|
(dolist (forbidden-terminal (lookahead-constraint-forbidden-terminals lookahead-constraint))
|
|
(terminalset-difference-f terminalset (make-terminalset grammar forbidden-terminal)))
|
|
(setf (lookahead-constraint-terminalset lookahead-constraint) terminalset)))
|
|
|
|
;Compute the values of passthrough-terminals and initial-terminals in each rule.
|
|
(do ((changed t))
|
|
((not changed))
|
|
(setq changed nil)
|
|
(dolist (nonterminal (grammar-nonterminals-list grammar))
|
|
(let ((rule (grammar-rule grammar nonterminal))
|
|
(new-initial-terminals *empty-terminalset*)
|
|
(new-passthrough-terminals *empty-terminalset*))
|
|
(dolist (production (rule-productions rule))
|
|
(multiple-value-bind (production-initial-terminals production-passthrough-terminals)
|
|
(string-initial-terminals grammar (production-rhs production) (production-constraints production) 0)
|
|
(terminalset-union-f new-initial-terminals production-initial-terminals)
|
|
(terminalset-union-f new-passthrough-terminals production-passthrough-terminals)))
|
|
(unless (terminalset-= new-initial-terminals (rule-initial-terminals rule))
|
|
(assert-true (terminalset-<= (rule-initial-terminals rule) new-initial-terminals))
|
|
(setf (rule-initial-terminals rule) new-initial-terminals)
|
|
(setq changed t))
|
|
(unless (terminalset-= new-passthrough-terminals (rule-passthrough-terminals rule))
|
|
(assert-true (terminalset-<= (rule-passthrough-terminals rule) new-passthrough-terminals))
|
|
(setf (rule-passthrough-terminals rule) new-passthrough-terminals)
|
|
(setq changed t)))))
|
|
|
|
;Compute the parameter-trees entries.
|
|
(let ((parameter-trees (grammar-parameter-trees grammar)))
|
|
(dolist (production-source interned-grammar-source)
|
|
(let ((general-production (gethash (third production-source) general-productions)))
|
|
(let ((lhs-general-nonterminal (general-production-lhs general-production)))
|
|
(when (or (generic-nonterminal? lhs-general-nonterminal)
|
|
(attributed-nonterminal? lhs-general-nonterminal))
|
|
(let* ((lhs-symbol (general-grammar-symbol-symbol lhs-general-nonterminal))
|
|
(parameter-tree (gethash lhs-symbol parameter-trees)))
|
|
(if parameter-tree
|
|
(parameter-tree-add-production grammar parameter-tree general-production)
|
|
(setf (gethash lhs-symbol parameter-trees)
|
|
(make-parameter-tree grammar general-production)))))))))
|
|
grammar)))))
|
|
|
|
|
|
(defvar *name-print-column* 70)
|
|
|
|
; Print the grammar nicely.
|
|
(defun print-grammar (grammar &optional (stream t) &key (details t))
|
|
(labels
|
|
((print-production-number (n)
|
|
(format nil "P~D" n)))
|
|
(let ((production-number-width (length (print-production-number (grammar-n-productions grammar)))))
|
|
(pprint-logical-block (stream nil)
|
|
(format stream "Terminals: ~@_~<~@{~W ~:_~}~:>~:@_" (coerce (grammar-terminals grammar) 'list))
|
|
(when details
|
|
(format stream "Nonterminals: ~@_~<~@{~W ~:_~}~:>~:@_" (grammar-nonterminals-list grammar)))
|
|
|
|
;Print the rules.
|
|
(pprint-logical-block (stream (grammar-nonterminals-list grammar))
|
|
(pprint-exit-if-list-exhausted)
|
|
(format stream "Rules:")
|
|
(pprint-indent :block 2 stream)
|
|
(pprint-newline :mandatory stream)
|
|
(loop
|
|
(let* ((rule-lhs (pprint-pop))
|
|
(rule (grammar-rule grammar rule-lhs)))
|
|
(pprint-logical-block (stream nil)
|
|
(pprint-logical-block (stream (rule-productions rule) :prefix (format nil "~W " rule-lhs))
|
|
(pprint-exit-if-list-exhausted)
|
|
(format stream "-> ")
|
|
(loop
|
|
(let ((production (pprint-pop)))
|
|
(format stream "~@<~:[<epsilon> ~;~:*~{~W ~:_~}~] ~_~vT~vA [~W]~:>"
|
|
(general-production-rhs-components production) *name-print-column*
|
|
production-number-width (print-production-number (production-number production))
|
|
(production-name production)))
|
|
(pprint-exit-if-list-exhausted)
|
|
(format stream "~:@_ | ")))
|
|
(pprint-indent :block 2 stream)
|
|
(when details
|
|
(format stream "~:@_Initial terminals: ~@_~@<~:[~;<epsilon> ~:_~]~{~W ~:_~}~:>"
|
|
(not (terminalset-empty? (rule-passthrough-terminals rule)))
|
|
(terminalset-list grammar (rule-initial-terminals rule)))))
|
|
(pprint-newline :mandatory stream))
|
|
(pprint-exit-if-list-exhausted)
|
|
(pprint-newline :mandatory stream)))
|
|
|
|
;Print the states.
|
|
(pprint-newline :mandatory stream)
|
|
(pprint-logical-block (stream (grammar-states grammar))
|
|
(pprint-exit-if-list-exhausted)
|
|
(format stream "States:")
|
|
(pprint-indent :block 2 stream)
|
|
(pprint-newline :mandatory stream)
|
|
(loop
|
|
(print-state (pprint-pop) stream)
|
|
(pprint-newline :mandatory stream)
|
|
(pprint-exit-if-list-exhausted)
|
|
(pprint-newline :mandatory stream))))
|
|
(pprint-newline :mandatory stream))))
|
|
|
|
|
|
(defmethod print-object ((grammar grammar) stream)
|
|
(print-unreadable-object (grammar stream :identity t)
|
|
(write-string "grammar" stream)))
|
|
|
|
|
|
; Emit markup paragraphs for the grammar.
|
|
(defun depict-grammar (markup-stream grammar)
|
|
(depict-paragraph (markup-stream ':body-text)
|
|
(depict markup-stream "Start nonterminal: ")
|
|
(depict-general-nonterminal markup-stream (gramar-user-start-symbol grammar) :reference))
|
|
(dolist (nonterminal (grammar-nonterminals-list grammar))
|
|
(unless (grammar-symbol-= nonterminal *start-nonterminal*)
|
|
(depict-general-rule markup-stream (grammar-rule grammar nonterminal)))))
|
|
|
|
|
|
; Return a list of nontrivial sets of states with the same kernels.
|
|
(defun grammar-kernel-sets (grammar)
|
|
(let ((states-hash (make-hash-table :test #'equal))
|
|
(equivalences nil))
|
|
(dolist (state (grammar-states grammar))
|
|
(push state (gethash (state-kernel state) states-hash)))
|
|
(maphash #'(lambda (kernel states)
|
|
(declare (ignore kernel))
|
|
(unless (= (length states) 1)
|
|
(push (nreverse states) equivalences)))
|
|
states-hash)
|
|
(sort equivalences #'< :key #'(lambda (equivalence)
|
|
(state-number (first equivalence))))))
|
|
|
|
|
|
;;; ------------------------------------------------------------------------------------------------------
|
|
;;; YACC OUTPUT
|
|
|
|
|
|
; Print the nonterminal in a yacc-like form.
|
|
(defun yacc-print-nonterminal (nonterminal &optional (stream t))
|
|
(cond
|
|
((keywordp nonterminal)
|
|
(write-string (symbol-upper-mixed-case-name nonterminal) stream))
|
|
((attributed-nonterminal? nonterminal)
|
|
(write-string (symbol-upper-mixed-case-name (attributed-nonterminal-symbol nonterminal)) stream)
|
|
(dolist (attribute (attributed-nonterminal-attributes nonterminal))
|
|
(write-char #\_ stream)
|
|
(write-string (symbol-lower-mixed-case-name attribute) stream)))
|
|
(t (error "Bad nonterminal ~S" nonterminal))))
|
|
|
|
|
|
; Print the grammar-symbol in a yacc-like form.
|
|
(defun yacc-print-grammar-symbol (grammar-symbol &optional (stream t))
|
|
(cond
|
|
((nonterminal? grammar-symbol)
|
|
(yacc-print-nonterminal grammar-symbol stream))
|
|
((and grammar-symbol (symbolp grammar-symbol))
|
|
(let ((name (symbol-name grammar-symbol)))
|
|
(if (and (> (length name) 0) (char= (char name 0) #\$))
|
|
(write-string (string-upcase name :start 1) stream)
|
|
(format stream "\"~A\"" (string-downcase name)))))
|
|
(t (error "Don't know how to emit markup for terminal ~S" grammar-symbol))))
|
|
|
|
#|
|
|
; Print the grammar in a yacc-like form.
|
|
(defun yacc-print-grammar (grammar &optional (stream t))
|
|
|#
|