;;; The contents of this file are subject to the Netscape Public License ;;; Version 1.0 (the "NPL"); you may not use this file except in ;;; compliance with the NPL. You may obtain a copy of the NPL at ;;; http://www.mozilla.org/NPL/ ;;; ;;; Software distributed under the NPL is distributed on an "AS IS" basis, ;;; WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL ;;; for the specific language governing rights and limitations under the ;;; NPL. ;;; ;;; The Initial Developer of this code under the NPL is Netscape ;;; Communications Corporation. Portions created by Netscape are ;;; Copyright (C) 1998 Netscape Communications Corporation. All Rights ;;; Reserved. ;;; ;;; Finite-state machine generator ;;; ;;; Waldemar Horwat (waldemar@netscape.com) ;;; ;;; ------------------------------------------------------------------------------------------------------ ;;; METATRANSITION (defstruct (metatransition (:constructor make-metatransition (next-metastate pre-productions post-productions))) (next-metastate nil :read-only t) ;Next metastate to enter or nil if this is an accept transition (pre-productions nil :read-only t) ;List of productions reduced by this transition (in order from first to last) before the shift (post-productions nil :read-only t)) ;List of productions reduced by this transition (in order from first to last) after the shift ;;; ------------------------------------------------------------------------------------------------------ ;;; METASTATE ;;; A metastate is a list of states that represents a possible stack that the ;;; LALR(1) parser may encounter. (defstruct (metastate (:constructor make-metastate (stack number transitions))) (stack nil :type list :read-only t) ;List of states that comprises a possible stack (number nil :type integer :read-only t) ;Serial number of this metastate (transitions nil :type simple-vector :read-only t)) ;Array, indexed by terminal numbers, of either nil or metatransition structures (declaim (inline metastate-transition)) (defun metastate-transition (metastate terminal-number) (svref (metastate-transitions metastate) terminal-number)) (defun print-metastate (metastate metagrammar &optional (stream t)) (let ((grammar (metagrammar-grammar metagrammar))) (pprint-logical-block (stream nil) (format stream "M~D:~2I ~@_~<~@{S~D ~:_~}~:>~:@_" (metastate-number metastate) (nreverse (mapcar #'state-number (metastate-stack metastate)))) (let ((transitions (metastate-transitions metastate))) (dotimes (terminal-number (length transitions)) (let ((transition (svref transitions terminal-number)) (terminal (svref (grammar-terminals grammar) terminal-number))) (when transition (let ((next-metastate (metatransition-next-metastate transition))) (pprint-logical-block (stream nil) (format stream "~W ==> ~@_~:I~:[accept~;M~:*~D~] ~_" terminal (and next-metastate (metastate-number next-metastate))) (pprint-fill stream (mapcar #'production-name (metatransition-pre-productions transition))) (format stream " ~@_") (pprint-fill stream (mapcar #'production-name (metatransition-post-productions transition)))) (pprint-newline :mandatory stream))))))))) (defmethod print-object ((metastate metastate) stream) (print-unreadable-object (metastate stream) (format stream "metastate S~D" (metastate-number metastate)))) ;;; ------------------------------------------------------------------------------------------------------ ;;; METAGRAMMAR (defstruct (metagrammar (:constructor allocate-metagrammar)) (grammar nil :type grammar :read-only t) ;The grammar to which this metagrammar corresponds (metastates nil :type list :read-only t) ;List of metastates ordered by metastate numbers (start nil :type metastate :read-only t)) ;The start metastate (defun make-metagrammar (grammar) (let* ((terminals (grammar-terminals grammar)) (n-terminals (length terminals)) (metastates-hash (make-hash-table :test #'equal)) ;Hash table of (list of state) -> metastate (metastates nil) (metastate-number -1)) (labels (;Return the stack after applying the given reduction production. (apply-reduction-production (stack production) (let* ((stack (nthcdr (production-rhs-length production) stack)) (state (first stack)) (dst-state (assert-non-null (cdr (assoc (production-lhs production) (state-gotos state) :test *grammar-symbol-=*)))) (dst-stack (cons dst-state stack))) (if (member dst-state stack :test #'eq) (error "This grammar cannot be represented by a FSM. Stack: ~S" dst-stack) dst-stack))) (get-metatransition (stack terminal productions) (let* ((state (first stack)) (transition (cdr (assoc terminal (state-transitions state) :test *grammar-symbol-=*)))) (when transition (case (transition-kind transition) (:shift (multiple-value-bind (metastate forwarding-productions) (get-metastate (transition-state transition) stack t) (make-metatransition metastate (nreverse productions) forwarding-productions))) (:reduce (let ((production (transition-production transition))) (get-metatransition (apply-reduction-production stack production) terminal (cons production productions)))) (:accept (make-metatransition nil (nreverse productions) nil)) (t (error "Bad transition: ~S" transition)))))) ;Return the metastate corresponding to the state stack (stack-top . stack-rest). Construct a new ;metastate if necessary. ;If simplify is true and stack-top is a state for which every outgoing transition is the same ;reduction, return two values: ; the metastate reached by following that reduction (doing it recursively if needed) ; a list of reduction productions followed this way. (get-metastate (stack-top stack-rest simplify) (let* ((stack (cons stack-top stack-rest)) (existing-metastate (gethash stack metastates-hash))) (cond (existing-metastate (values existing-metastate nil)) ((member stack-top stack-rest :test #'eq) (error "This grammar cannot be represented by a FSM. Stack: ~S" stack)) (t (let ((forwarding-production (and simplify (forwarding-state-production stack-top)))) (if forwarding-production (let ((stack (apply-reduction-production stack forwarding-production))) (multiple-value-bind (metastate forwarding-productions) (get-metastate (car stack) (cdr stack) simplify) (values metastate (cons forwarding-production forwarding-productions)))) (let* ((transitions (make-array n-terminals :initial-element nil)) (metastate (make-metastate stack (incf metastate-number) transitions))) (setf (gethash stack metastates-hash) metastate) (push metastate metastates) (dotimes (n n-terminals) (setf (svref transitions n) (get-metatransition stack (svref terminals n) nil))) (values metastate nil))))))))) (let ((start-metastate (get-metastate (grammar-start-state grammar) nil nil))) (allocate-metagrammar :grammar grammar :metastates (nreverse metastates) :start start-metastate))))) ; Print the metagrammar nicely. (defun print-metagrammar (metagrammar &optional (stream t) &key (grammar t) (details t)) (pprint-logical-block (stream nil) (when grammar (print-grammar (metagrammar-grammar metagrammar) stream :details details)) ;Print the metastates. (format stream "Start metastate: ~@_M~D~:@_~:@_" (metastate-number (metagrammar-start metagrammar))) (pprint-logical-block (stream (metagrammar-metastates metagrammar)) (pprint-exit-if-list-exhausted) (format stream "Metastates:~2I~:@_") (loop (print-metastate (pprint-pop) metagrammar stream) (pprint-exit-if-list-exhausted) (pprint-newline :mandatory stream)))) (pprint-newline :mandatory stream)) (defmethod print-object ((metagrammar metagrammar) stream) (print-unreadable-object (metagrammar stream :identity t) (write-string "metagrammar" stream))) ; Find the longest possible prefix of the input list of tokens that is accepted by the ; grammar. Parse that prefix and return two values: ; the list of action results; ; the tail of the input list of tokens remaining to be parsed. ; Signal an error if no prefix of the input list is accepted by the grammar. ; ; token-terminal is a function that returns a terminal symbol when given an input token. ; If trace is: ; nil, don't print trace information ; :code, print trace information, including action code ; other print trace information (defun action-metaparse (metagrammar token-terminal input &key trace) (if trace (trace-action-metaparse metagrammar token-terminal input trace) (let ((grammar (metagrammar-grammar metagrammar))) (labels ((transition-value-stack (value-stack productions) (dolist (production productions) (setq value-stack (funcall (production-evaluator production) value-stack))) value-stack) (cut (input good-metastate good-input good-value-stack) (unless good-metastate (error "Parse error on ~S ..." (ldiff input (nthcdr 10 input)))) (let ((last-metatransition (metastate-transition good-metastate *end-marker-terminal-number*))) (assert-true (null (metatransition-next-metastate last-metatransition))) (assert-true (null (metatransition-post-productions last-metatransition))) (values (reverse (transition-value-stack good-value-stack (metatransition-pre-productions last-metatransition))) good-input)))) (do ((metastate (metagrammar-start metagrammar)) (input input (cdr input)) (value-stack nil) (last-good-metastate nil) last-good-input last-good-value-stack) (nil) (when (metastate-transition metastate *end-marker-terminal-number*) (setq last-good-metastate metastate) (setq last-good-input input) (setq last-good-value-stack value-stack)) (when (endp input) (return (cut input last-good-metastate last-good-input last-good-value-stack))) (let* ((token (first input)) (terminal (funcall token-terminal token)) (terminal-number (terminal-number grammar terminal)) (metatransition (metastate-transition metastate terminal-number))) (unless metatransition (return (cut input last-good-metastate last-good-input last-good-value-stack))) (setq value-stack (transition-value-stack value-stack (metatransition-pre-productions metatransition))) (dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar))) (push (funcall (cdr action-function-binding) token) value-stack)) (setq value-stack (transition-value-stack value-stack (metatransition-post-productions metatransition))) (setq metastate (metatransition-next-metastate metatransition)))))))) ; Same as action-parse, but with tracing information ; If trace is: ; :code, print trace information, including action code ; other print trace information (defun trace-action-metaparse (metagrammar token-terminal input trace) (let ((grammar (metagrammar-grammar metagrammar))) (labels ((print-stacks (value-stack type-stack) (write-string " " *trace-output*) (if value-stack (print-values (reverse value-stack) (reverse type-stack) *trace-output*) (write-string "empty" *trace-output*)) (pprint-newline :mandatory *trace-output*)) (transition-value-stack (value-stack type-stack productions) (dolist (production productions) (write-string " reduce " *trace-output*) (if (eq trace :code) (write production :stream *trace-output* :pretty t) (print-production production *trace-output*)) (pprint-newline :mandatory *trace-output*) (setq value-stack (funcall (production-evaluator production) value-stack)) (setq type-stack (nthcdr (production-n-action-args production) type-stack)) (dolist (action-signature (grammar-symbol-signature grammar (production-lhs production))) (push (cdr action-signature) type-stack)) (print-stacks value-stack type-stack)) (values value-stack type-stack)) (cut (input good-metastate good-input good-value-stack good-type-stack) (unless good-metastate (error "Parse error on ~S ..." (ldiff input (nthcdr 10 input)))) (let ((last-metatransition (metastate-transition good-metastate *end-marker-terminal-number*))) (assert-true (null (metatransition-next-metastate last-metatransition))) (assert-true (null (metatransition-post-productions last-metatransition))) (format *trace-output* "cut to M~D~:@_" (metastate-number good-metastate)) (print-stacks good-value-stack good-type-stack) (pprint-newline :mandatory *trace-output*) (values (reverse (transition-value-stack good-value-stack good-type-stack (metatransition-pre-productions last-metatransition))) good-input)))) (do ((metastate (metagrammar-start metagrammar)) (input input (cdr input)) (value-stack nil) (type-stack nil) (last-good-metastate nil) last-good-input last-good-value-stack last-good-type-stack) (nil) (format *trace-output* "M~D" (metastate-number metastate)) (when (metastate-transition metastate *end-marker-terminal-number*) (write-string " (good)" *trace-output*) (setq last-good-metastate metastate) (setq last-good-input input) (setq last-good-value-stack value-stack) (setq last-good-type-stack type-stack)) (write-string ": " *trace-output*) (when (endp input) (return (cut input last-good-metastate last-good-input last-good-value-stack last-good-type-stack))) (let* ((token (first input)) (terminal (funcall token-terminal token)) (terminal-number (terminal-number grammar terminal)) (metatransition (metastate-transition metastate terminal-number))) (unless metatransition (format *trace-output* "shift ~W: " terminal) (return (cut input last-good-metastate last-good-input last-good-value-stack last-good-type-stack))) (format *trace-output* "transition to M~D~:@_" (metastate-number (metatransition-next-metastate metatransition))) (multiple-value-setq (value-stack type-stack) (transition-value-stack value-stack type-stack (metatransition-pre-productions metatransition))) (dolist (action-function-binding (gethash terminal (grammar-terminal-actions grammar))) (push (funcall (cdr action-function-binding) token) value-stack)) (dolist (action-signature (grammar-symbol-signature grammar terminal)) (push (cdr action-signature) type-stack)) (format *trace-output* "shift ~W~:@_" terminal) (print-stacks value-stack type-stack) (multiple-value-setq (value-stack type-stack) (transition-value-stack value-stack type-stack (metatransition-post-productions metatransition))) (setq metastate (metatransition-next-metastate metatransition))))))) ; Compute all representative strings of terminals such that, for each such string S: ; S is rejected by the grammar's language; ; all prefixes of S are also rejected by the grammar's language; ; for any S and all strings of terminals T, the concatenated string ST is also ; rejected by the grammar's language; ; no string S1 is a prefix of (or equal to) another string S2. ; Often there are infinitely many such strings S, so only output one for each illegal ; metaparser transition. ; Return a list of S's, where each S is itself a list of terminals. (defun compute-illegal-strings (metagrammar) (let* ((grammar (metagrammar-grammar metagrammar)) (terminals (grammar-terminals grammar)) (n-terminals (length terminals)) (metastates (metagrammar-metastates metagrammar)) (n-metastates (length metastates)) (visited-metastates (make-array n-metastates :element-type 'bit :initial-element 0)) (illegal-strings nil)) (labels ((visit (metastate reversed-string) (let ((metastate-number (metastate-number metastate))) (when (= (sbit visited-metastates metastate-number) 0) (setf (sbit visited-metastates metastate-number) 1) (let ((metatransitions (metastate-transitions metastate))) ;If there is a transition for the end marker from this state, then string ;is accepted by the language, so cut off the search. (unless (svref metatransitions *end-marker-terminal-number*) (dotimes (terminal-number n-terminals) (unless (= terminal-number *end-marker-terminal-number*) (let ((metatransition (svref metatransitions terminal-number)) (reversed-string (cons (svref terminals terminal-number) reversed-string))) (if metatransition (visit (metatransition-next-metastate metatransition) reversed-string) (push (reverse reversed-string) illegal-strings))))))))))) (visit (metagrammar-start metagrammar) nil) (nreverse illegal-strings)))) ; Compute and print illegal strings of terminals. See compute-illegal-strings. (defun print-illegal-strings (metagrammar &optional (stream t)) (pprint-logical-block (stream (compute-illegal-strings metagrammar)) (format stream "Illegal strings:~2I") (loop (pprint-exit-if-list-exhausted) (pprint-newline :mandatory stream) (pprint-fill stream (pprint-pop)))) (pprint-newline :mandatory stream))