diff --git a/SPECS/tidy/CVE-2021-33391.patch b/SPECS/tidy/CVE-2021-33391.patch
new file mode 100644
index 0000000000..e36af526e0
--- /dev/null
+++ b/SPECS/tidy/CVE-2021-33391.patch
@@ -0,0 +1,1108 @@
+diff --git a/src/gdoc.c b/src/gdoc.c
+index 50cd9bc..3786746 100644
+--- a/src/gdoc.c
++++ b/src/gdoc.c
+@@ -96,14 +96,15 @@ static void DiscardContainer( TidyDocImpl* doc, Node *element, Node **pnode)
+ 
+ static void CleanNode( TidyDocImpl* doc, Node *node )
+ {
++    Stack *stack = TY_(newStack)(doc, 16);
+     Node *child, *next;
+ 
+-    if (node->content)
++    if ( (child = node->content) )
+     {
+-        for (child = node->content; child != NULL; child = next)
++        while (child)
+         {
+             next = child->next;
+-
++            
+             if (TY_(nodeIsElement)(child))
+             {
+                 if (nodeIsSTYLE(child))
+@@ -131,11 +132,16 @@ static void CleanNode( TidyDocImpl* doc, Node *node )
+                     if (child->attributes)
+                         TY_(DropAttrByName)( doc, child, "class" );
+ 
+-                    CleanNode(doc, child);
++                    TY_(push)(stack,next);
++                    child = child->content;
++                    continue;
+                 }
+             }
++            child = next ? next : TY_(pop)(stack);
+         }
+     }
++    TY_(freeStack)(stack);
++    TidyFree(doc->allocator, stack);
+ }
+ 
+ /* insert meta element to force browser to recognize doc as UTF8 */
+diff --git a/src/lexer.c b/src/lexer.c
+index bc4e50a..d1cae84 100644
+--- a/src/lexer.c
++++ b/src/lexer.c
+@@ -877,15 +877,6 @@ static tmbchar LastChar( tmbstr str )
+     return 0;
+ }
+ 
+-/*
+-   node->type is one of these:
+-
+-    #define TextNode    1
+-    #define StartTag    2
+-    #define EndTag      3
+-    #define StartEndTag 4
+-*/
+-
+ Lexer* TY_(NewLexer)( TidyDocImpl* doc )
+ {
+     Lexer* lexer = (Lexer*) TidyDocAlloc( doc, sizeof(Lexer) );
+@@ -1545,13 +1536,7 @@ void TY_(FreeNode)( TidyDocImpl* doc, Node *node )
+         }
+     }
+ #endif
+-    /* this is no good ;=((
+-    if (node && doc && doc->lexer) {
+-        if (node == doc->lexer->token) {
+-            doc->lexer->token = NULL; // TY_(NewNode)( doc->lexer->allocator, doc->lexer );
+-        }
+-    }
+-      ----------------- */
++
+     while ( node )
+     {
+         Node* next = node->next;
+@@ -4462,11 +4447,102 @@ static Node *ParseDocTypeDecl(TidyDocImpl* doc)
+     return NULL;
+ }
+ 
+-/*
+- * local variables:
+- * mode: c
+- * indent-tabs-mode: nil
+- * c-basic-offset: 4
+- * eval: (c-set-offset 'substatement-open 0)
+- * end:
++
++/****************************************************************************//*
++ ** MARK: - Node Stack
++ ***************************************************************************/
++
++
++/**
++ * Create a new stack with a given starting capacity. If memory allocation
++ * fails, then the allocator will panic the program automatically.
++ */
++Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity)
++{
++    Stack *stack = (Stack *)TidyAlloc(doc->allocator, sizeof(Stack));
++    stack->top = -1;
++    stack->capacity = capacity;
++    stack->firstNode = (Node **)TidyAlloc(doc->allocator, stack->capacity * sizeof(Node**));
++    stack->allocator = doc->allocator;
++    return stack;
++}
++ 
++
++/**
++ *  Increase the stack size. This will be called automatically when the
++ *  current stack is full. If memory allocation fails, then the allocator
++ *  will panic the program automatically.
++ */
++void TY_(growStack)(Stack *stack)
++{
++    uint new_capacity = stack->capacity * 2;
++    
++    Node **firstNode = (Node **)TidyAlloc(stack->allocator, new_capacity);
++    
++    memcpy( firstNode, stack->firstNode, sizeof(Node**) * (stack->top + 1) );
++    TidyFree(stack->allocator, stack->firstNode);
++
++    stack->firstNode = firstNode;
++    stack->capacity = new_capacity;
++}
++
++
++/**
++ * Stack is full when top is equal to the last index.
++ */
++Bool TY_(stackFull)(Stack *stack)
++{
++    return stack->top == stack->capacity - 1;
++}
++
++
++/**
++ * Stack is empty when top is equal to -1
++ */
++Bool TY_(stackEmpty)(Stack *stack)
++{
++    return stack->top == -1;
++}
++ 
++
++/**
++ * Push an item to the stack.
++ */
++void TY_(push)(Stack *stack, Node *node)
++{
++    if (TY_(stackFull)(stack))
++        TY_(growStack)(stack);
++    
++    if (node)
++        stack->firstNode[++stack->top] = node;
++}
++
++
++/**
++ * Pop an item from the stack.
++ */
++Node* TY_(pop)(Stack *stack)
++{
++    return TY_(stackEmpty)(stack) ? NULL : stack->firstNode[stack->top--];
++}
++
++
++/**
++ * Peek at the stack.
+  */
++FUNC_UNUSED Node* TY_(peek)(Stack *stack)
++{
++    return TY_(stackEmpty)(stack) ? NULL : stack->firstNode[stack->top--];
++}
++
++/**
++ *  Frees the stack when done.
++ */
++void TY_(freeStack)(Stack *stack)
++{
++    TidyFree( stack->allocator, stack->firstNode );
++    stack->top = -1;
++    stack->capacity = 0;
++    stack->firstNode = NULL;
++    stack->allocator = NULL;
++}
+diff --git a/src/lexer.h b/src/lexer.h
+index c181d4b..d9ae113 100644
+--- a/src/lexer.h
++++ b/src/lexer.h
+@@ -1,33 +1,46 @@
+ #ifndef __LEXER_H__
+ #define __LEXER_H__
+ 
+-/* lexer.h -- Lexer for html parser
+-  
+-   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
+-   See tidy.h for the copyright notice.
+ 
+-  Given an input source, it returns a sequence of tokens.
+-
+-     GetToken(source) gets the next token
+-     UngetToken(source) provides one level undo
+-
+-  The tags include an attribute list:
+-
+-    - linked list of attribute/value nodes
+-    - each node has 2 NULL-terminated strings.
+-    - entities are replaced in attribute values
+-
+-  white space is compacted if not in preformatted mode
+-  If not in preformatted mode then leading white space
+-  is discarded and subsequent white space sequences
+-  compacted to single space characters.
+-
+-  If XmlTags is no then Tag names are folded to upper
+-  case and attribute names to lower case.
+-
+- Not yet done:
+-    -   Doctype subset and marked sections
+-*/
++/**************************************************************************//**
++ * @file
++ * Lexer for HTML and XML Parsers.
++ *
++ *   Given an input source, it returns a sequence of tokens.
++ *
++ *      GetToken(source) gets the next token
++ *      UngetToken(source) provides one level undo
++ *
++ *   The tags include an attribute list:
++ *
++ *     - linked list of attribute/value nodes
++ *     - each node has 2 NULL-terminated strings.
++ *     - entities are replaced in attribute values
++ *
++ *   white space is compacted if not in preformatted mode
++ *   If not in preformatted mode then leading white space
++ *   is discarded and subsequent white space sequences
++ *   compacted to single space characters.
++ *
++ *   If XmlTags is no then Tag names are folded to upper
++ *   case and attribute names to lower case.
++ *
++ *  Not yet done:
++ *     - Doctype subset and marked sections
++ *
++ * @author  HTACG, et al (consult git log)
++ *
++ * @copyright
++ *     (c) 1998-2021 (W3C) MIT, ERCIM, Keio University, and HTACG.
++ *     See tidy.h for the copyright notice.
++ * @par
++ *     All Rights Reserved.
++ * @par
++ *     See `tidy.h` for the complete license.
++ *
++ * @date Additional updates: consult git log
++ *
++ ******************************************************************************/
+ 
+ #ifdef __cplusplus
+ extern "C" {
+@@ -35,8 +48,23 @@ extern "C" {
+ 
+ #include "forward.h"
+ 
+-/* lexer character types
+-*/
++/** @addtogroup internal_api */
++/** @{ */
++
++
++/***************************************************************************//**
++ ** @defgroup lexer_h HTML and XML Lexing
++ **
++ ** These functions and structures form the internal API for document
++ ** lexing.
++ **
++ ** @{
++ ******************************************************************************/
++
++
++/**
++ *  Lexer character types.
++ */
+ #define digit       1u
+ #define letter      2u
+ #define namechar    4u
+@@ -47,8 +75,9 @@ extern "C" {
+ #define digithex    128u
+ 
+ 
+-/* node->type is one of these values
+-*/
++/**
++ *  node->type is one of these values
++ */
+ typedef enum
+ {
+   RootNode,
+@@ -68,9 +97,9 @@ typedef enum
+ } NodeType;
+ 
+ 
+-
+-/* lexer GetToken states
+-*/
++/**
++ *  Lexer GetToken() states.
++ */
+ typedef enum
+ {
+   LEX_CONTENT,
+@@ -88,7 +117,10 @@ typedef enum
+   LEX_XMLDECL
+ } LexerState;
+ 
+-/* ParseDocTypeDecl state constants */
++
++/**
++ *  ParseDocTypeDecl state constants.
++ */
+ typedef enum
+ {
+   DT_INTERMEDIATE,
+@@ -98,67 +130,43 @@ typedef enum
+   DT_INTSUBSET
+ } ParseDocTypeDeclState;
+ 
+-/* content model shortcut encoding
+-
+-   Descriptions are tentative.
+-*/
++/**
++ *  Content model shortcut encoding.
++ *  Descriptions are tentative.
++ */
+ #define CM_UNKNOWN      0
+-/* Elements with no content. Map to HTML specification. */
+-#define CM_EMPTY        (1 << 0)
+-/* Elements that appear outside of "BODY". */
+-#define CM_HTML         (1 << 1)
+-/* Elements that can appear within HEAD. */
+-#define CM_HEAD         (1 << 2)
+-/* HTML "block" elements. */
+-#define CM_BLOCK        (1 << 3)
+-/* HTML "inline" elements. */
+-#define CM_INLINE       (1 << 4)
+-/* Elements that mark list item ("LI"). */
+-#define CM_LIST         (1 << 5)
+-/* Elements that mark definition list item ("DL", "DT"). */
+-#define CM_DEFLIST      (1 << 6)
+-/* Elements that can appear inside TABLE. */
+-#define CM_TABLE        (1 << 7)
+-/* Used for "THEAD", "TFOOT" or "TBODY". */
+-#define CM_ROWGRP       (1 << 8)
+-/* Used for "TD", "TH" */
+-#define CM_ROW          (1 << 9)
+-/* Elements whose content must be protected against white space movement.
+-   Includes some elements that can found in forms. */
+-#define CM_FIELD        (1 << 10)
+-/* Used to avoid propagating inline emphasis inside some elements
+-   such as OBJECT or APPLET. */
+-#define CM_OBJECT       (1 << 11)
+-/* Elements that allows "PARAM". */
+-#define CM_PARAM        (1 << 12)
+-/* "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
+-#define CM_FRAMES       (1 << 13)
+-/* Heading elements (h1, h2, ...). */
+-#define CM_HEADING      (1 << 14)
+-/* Elements with an optional end tag. */
+-#define CM_OPT          (1 << 15)
+-/* Elements that use "align" attribute for vertical position. */
+-#define CM_IMG          (1 << 16)
+-/* Elements with inline and block model. Used to avoid calling InlineDup. */
+-#define CM_MIXED        (1 << 17)
+-/* Elements whose content needs to be indented only if containing one 
+-   CM_BLOCK element. */
+-#define CM_NO_INDENT    (1 << 18)
+-/* Elements that are obsolete (such as "dir", "menu"). */
+-#define CM_OBSOLETE     (1 << 19)
+-/* User defined elements. Used to determine how attributes wihout value
+-   should be printed. */
+-#define CM_NEW          (1 << 20)
+-/* Elements that cannot be omitted. */
+-#define CM_OMITST       (1 << 21)
+-
+-/* If the document uses just HTML 2.0 tags and attributes described
+-** it as HTML 2.0 Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
+-** If there are proprietary tags and attributes then describe it as
+-** HTML Proprietary. If it includes the xml-lang or xmlns attributes
+-** but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
+-** flavors of Voyager (strict, loose or frameset).
+-*/
++#define CM_EMPTY        (1 << 0)   /**< Elements with no content. Map to HTML specification. */
++#define CM_HTML         (1 << 1)   /**< Elements that appear outside of "BODY". */
++#define CM_HEAD         (1 << 2)   /**< Elements that can appear within HEAD. */
++#define CM_BLOCK        (1 << 3)   /**< HTML "block" elements. */
++#define CM_INLINE       (1 << 4)   /**< HTML "inline" elements. */
++#define CM_LIST         (1 << 5)   /**< Elements that mark list item ("LI"). */
++#define CM_DEFLIST      (1 << 6)   /**< Elements that mark definition list item ("DL", "DT"). */
++#define CM_TABLE        (1 << 7)   /**< Elements that can appear inside TABLE. */
++#define CM_ROWGRP       (1 << 8)   /**< Used for "THEAD", "TFOOT" or "TBODY". */
++#define CM_ROW          (1 << 9)   /**< Used for "TD", "TH" */
++#define CM_FIELD        (1 << 10)  /**< Elements whose content must be protected against white space movement. Includes some elements that can found in forms. */
++#define CM_OBJECT       (1 << 11)  /**< Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET. */
++#define CM_PARAM        (1 << 12)  /**< Elements that allows "PARAM". */
++#define CM_FRAMES       (1 << 13)  /**< "FRAME", "FRAMESET", "NOFRAMES". Used in ParseFrameSet. */
++#define CM_HEADING      (1 << 14)  /**< Heading elements (h1, h2, ...). */
++#define CM_OPT          (1 << 15)  /**< Elements with an optional end tag. */
++#define CM_IMG          (1 << 16)  /**< Elements that use "align" attribute for vertical position. */
++#define CM_MIXED        (1 << 17)  /**< Elements with inline and block model. Used to avoid calling InlineDup. */
++#define CM_NO_INDENT    (1 << 18)  /**< Elements whose content needs to be indented only if containing one CM_BLOCK element. */
++#define CM_OBSOLETE     (1 << 19)  /**< Elements that are obsolete (such as "dir", "menu"). */
++#define CM_NEW          (1 << 20)  /**< User defined elements. Used to determine how attributes without value should be printed. */
++#define CM_OMITST       (1 << 21)   /**< Elements that cannot be omitted. */
++
++
++/**
++ *  If the document uses just HTML 2.0 tags and attributes described
++ *  it is HTML 2.0. Similarly for HTML 3.2 and the 3 flavors of HTML 4.0.
++ *  If there are proprietary tags and attributes then describe it as
++ *  HTML Proprietary. If it includes the xml-lang or xmlns attributes
++ *  but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the
++ *  flavors of Voyager (strict, loose or frameset).
++ */
+ 
+ /* unknown */
+ #define xxxx                   0u
+@@ -220,8 +228,10 @@ typedef enum
+ /* all proprietary types */
+ #define VERS_PROPRIETARY   (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
+ 
+-/* Linked list of class names and styles
+-*/
++
++/**
++ *  Linked list of class names and styles
++ */
+ struct _Style;
+ typedef struct _Style TagStyle;
+ 
+@@ -234,8 +244,9 @@ struct _Style
+ };
+ 
+ 
+-/* Linked list of style properties
+-*/
++/**
++ *  Linked list of style properties
++ */
+ struct _StyleProp;
+ typedef struct _StyleProp StyleProp;
+ 
+@@ -247,11 +258,9 @@ struct _StyleProp
+ };
+ 
+ 
+-
+-
+-/* Attribute/Value linked list node
+-*/
+-
++/**
++ *  Attribute/Value linked list node
++ */
+ struct _AttVal
+ {
+     AttVal*           next;
+@@ -264,93 +273,89 @@ struct _AttVal
+ };
+ 
+ 
+-
+-/*
+-  Mosaic handles inlines via a separate stack from other elements
+-  We duplicate this to recover from inline markup errors such as:
+-
+-     <i>italic text
+-     <p>more italic text</b> normal text
+-
+-  which for compatibility with Mosaic is mapped to:
+-
+-     <i>italic text</i>
+-     <p><i>more italic text</i> normal text
+-
+-  Note that any inline end tag pop's the effect of the current
+-  inline start tag, so that </b> pop's <i> in the above example.
++/**
++ *  Mosaic handles inlines via a separate stack from other elements
++ *  We duplicate this to recover from inline markup errors such as:
++ *     ~~~
++ *     <i>italic text
++ *     <p>more italic text</b> normal text
++ *     ~~~
++ *  which for compatibility with Mosaic is mapped to:
++ *     ~~~
++ *     <i>italic text</i>
++ *     <p><i>more italic text</i> normal text
++ *     ~~~
++ *  Note that any inline end tag pop's the effect of the current
++ *  inline start tag, so that `</b>` pop's `<i>` in the above example.
+ */
+ struct _IStack
+ {
+     IStack*     next;
+-    const Dict* tag;        /* tag's dictionary definition */
+-    tmbstr      element;    /* name (NULL for text nodes) */
++    const Dict* tag;        /**< tag's dictionary definition */
++    tmbstr      element;    /**< name (NULL for text nodes) */
+     AttVal*     attributes;
+ };
+ 
+ 
+-/* HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl,
+-** etc. etc.
+-*/
+-
++/**
++ *  HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
++ */
+ struct _Node
+ {
+-    Node*       parent;         /* tree structure */
++    Node*       parent;         /**< tree structure */
+     Node*       prev;
+     Node*       next;
+     Node*       content;
+     Node*       last;
+ 
+     AttVal*     attributes;
+-    const Dict* was;            /* old tag when it was changed */
+-    const Dict* tag;            /* tag's dictionary definition */
++    const Dict* was;            /**< old tag when it was changed */
++    const Dict* tag;            /**< tag's dictionary definition */
+ 
+-    tmbstr      element;        /* name (NULL for text nodes) */
++    tmbstr      element;        /**< name (NULL for text nodes) */
+ 
+-    uint        start;          /* start of span onto text array */
+-    uint        end;            /* end of span onto text array */
+-    NodeType    type;           /* TextNode, StartTag, EndTag etc. */
++    uint        start;          /**< start of span onto text array */
++    uint        end;            /**< end of span onto text array */
++    NodeType    type;           /**< TextNode, StartTag, EndTag etc. */
+ 
+-    uint        line;           /* current line of document */
+-    uint        column;         /* current column of document */
++    uint        line;           /**< current line of document */
++    uint        column;         /**< current column of document */
+ 
+-    Bool        closed;         /* true if closed by explicit end tag */
+-    Bool        implicit;       /* true if inferred */
+-    Bool        linebreak;      /* true if followed by a line break */
++    Bool        closed;         /**< true if closed by explicit end tag */
++    Bool        implicit;       /**< true if inferred */
++    Bool        linebreak;      /**< true if followed by a line break */
+ };
+ 
+ 
+-/*
+-  The following are private to the lexer
+-  Use NewLexer() to create a lexer, and
+-  FreeLexer() to free it.
+-*/
+-
++/**
++ *  The following are private to the lexer.
++ *  Use `NewLexer()` to create a lexer, and `FreeLexer()` to free it.
++ */
+ struct _Lexer
+ {
+-    uint lines;             /* lines seen */
+-    uint columns;           /* at start of current token */
+-    Bool waswhite;          /* used to collapse contiguous white space */
+-    Bool pushed;            /* true after token has been pushed back */
+-    Bool insertspace;       /* when space is moved after end tag */
+-    Bool excludeBlocks;     /* Netscape compatibility */
+-    Bool exiled;            /* true if moved out of table */
+-    Bool isvoyager;         /* true if xmlns attribute on html element */
+-    uint versions;          /* bit vector of HTML versions */
+-    uint doctype;           /* version as given by doctype (if any) */
+-    uint versionEmitted;    /* version of doctype emitted */
+-    Bool bad_doctype;       /* e.g. if html or PUBLIC is missing */
+-    uint txtstart;          /* start of current node */
+-    uint txtend;            /* end of current node */
+-    LexerState state;       /* state of lexer's finite state machine */
+-
+-    Node* token;            /* last token returned by GetToken() */
+-    Node* itoken;           /* last duplicate inline returned by GetToken() */
+-    Node* root;             /* remember root node of the document */
+-    Node* parent;           /* remember parent node for CDATA elements */
+-    
+-    Bool seenEndBody;       /* true if a </body> tag has been encountered */
+-    Bool seenEndHtml;       /* true if a </html> tag has been encountered */
++    uint lines;                /**< lines seen */
++    uint columns;              /**< at start of current token */
++    Bool waswhite;             /**< used to collapse contiguous white space */
++    Bool pushed;               /**< true after token has been pushed back */
++    Bool insertspace;          /**< when space is moved after end tag */
++    Bool excludeBlocks;        /**< Netscape compatibility */
++    Bool exiled;               /**< true if moved out of table */
++    Bool isvoyager;            /**< true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML). */
++    uint versions;             /**< bit vector of HTML versions */
++    uint doctype;              /**< version as given by doctype (if any) */
++    uint versionEmitted;       /**< version of doctype emitted */
++    Bool bad_doctype;          /**< e.g. if html or PUBLIC is missing */
++    uint txtstart;             /**< start of current node */
++    uint txtend;               /**< end of current node */
++    LexerState state;          /**< state of lexer's finite state machine */
++
++    Node* token;               /**< last token returned by GetToken() */
++    Node* itoken;              /**< last duplicate inline returned by GetToken() */
++    Node* root;                /**< remember root node of the document */
++    Node* parent;              /**< remember parent node for CDATA elements */
++
++    Bool seenEndBody;          /**< true if a `</body>` tag has been encountered */
++    Bool seenEndHtml;          /**< true if a `</html>` tag has been encountered */
+ 
+     /*
+       Lexer character buffer
+@@ -361,33 +366,57 @@ struct _Lexer
+ 
+       lexsize must be reset for each file.
+     */
+-    tmbstr lexbuf;          /* MB character buffer */
+-    uint lexlength;         /* allocated */
+-    uint lexsize;           /* used */
++    tmbstr lexbuf;             /**< MB character buffer */
++    uint lexlength;            /**< allocated */
++    uint lexsize;              /**< used */
+ 
+     /* Inline stack for compatibility with Mosaic */
+-    Node* inode;            /* for deferring text node */
+-    IStack* insert;         /* for inferring inline tags */
++    Node* inode;               /**< for deferring text node */
++    IStack* insert;            /**< for inferring inline tags */
+     IStack* istack;
+-    uint istacklength;      /* allocated */
+-    uint istacksize;        /* used */
+-    uint istackbase;        /* start of frame */
++    uint istacklength;         /**< allocated */
++    uint istacksize;           /**< used */
++    uint istackbase;           /**< start of frame */
+ 
+-    TagStyle *styles;          /* used for cleaning up presentation markup */
++    TagStyle *styles;          /**< used for cleaning up presentation markup */
+ 
+-    TidyAllocator* allocator; /* allocator */
++    TidyAllocator* allocator;  /**< allocator */
+ };
+ 
+ 
+-/* Lexer Functions
+-*/
++/**
++ *  modes for GetToken()
++ *
++ *  MixedContent   -- for elements which don't accept PCDATA
++ *  Preformatted   -- white space preserved as is
++ *  IgnoreMarkup   -- for CDATA elements such as script, style
++ */
++typedef enum
++{
++  IgnoreWhitespace,
++  MixedContent,
++  Preformatted,
++  IgnoreMarkup,
++  OtherNamespace,
++  CdataContent
++} GetTokenMode;
+ 
+-/* choose what version to use for new doctype */
++
++/** @name Lexer Functions
++ *  @{
++ */
++
++
++/**
++ *  Choose what version to use for new doctype
++ */
+ TY_PRIVATE int TY_(HTMLVersion)( TidyDocImpl* doc );
+ 
+-/* everything is allowed in proprietary version of HTML */
+-/* this is handled here rather than in the tag/attr dicts */
+ 
++/**
++ *  Everything is allowed in proprietary version of HTML.
++ *  This is handled here rather than in the tag/attr dicts
++ */
+ TY_PRIVATE void TY_(ConstrainVersion)( TidyDocImpl* doc, uint vers );
+ 
+ TY_PRIVATE Bool TY_(IsWhite)(uint c);
+@@ -399,7 +428,6 @@ TY_PRIVATE Bool TY_(IsNamechar)(uint c);
+ TY_PRIVATE Bool TY_(IsXMLLetter)(uint c);
+ TY_PRIVATE Bool TY_(IsXMLNamechar)(uint c);
+ 
+-/* Bool IsLower(uint c); */
+ TY_PRIVATE Bool TY_(IsUpper)(uint c);
+ TY_PRIVATE uint TY_(ToLower)(uint c);
+ TY_PRIVATE uint TY_(ToUpper)(uint c);
+@@ -407,60 +435,82 @@ TY_PRIVATE uint TY_(ToUpper)(uint c);
+ TY_PRIVATE Lexer* TY_(NewLexer)( TidyDocImpl* doc );
+ TY_PRIVATE void TY_(FreeLexer)( TidyDocImpl* doc );
+ 
+-/* store character c as UTF-8 encoded byte stream */
++
++/**
++ *  Store character c as UTF-8 encoded byte stream
++ */
+ TY_PRIVATE void TY_(AddCharToLexer)( Lexer *lexer, uint c );
+ 
+-/*
+-  Used for elements and text nodes
+-  element name is NULL for text nodes
+-  start and end are offsets into lexbuf
+-  which contains the textual content of
+-  all elements in the parse tree.
+-
+-  parent and content allow traversal
+-  of the parse tree in any direction.
+-  attributes are represented as a linked
+-  list of AttVal nodes which hold the
+-  strings for attribute/value pairs.
++
++/**
++ *  Used for elements and text nodes.
++ *   - Element name is NULL for text nodes.
++ *   - start and end are offsets into lexbuf,
++ *     which contains the textual content of
++ *     all elements in the parse tree.
++ *   - parent and content allow traversal
++ *     of the parse tree in any direction.
++ *   - attributes are represented as a linked
++ *     list of AttVal nodes which hold the
++ *     strings for attribute/value pairs.
+ */
+ TY_PRIVATE Node* TY_(NewNode)( TidyAllocator* allocator, Lexer* lexer );
+ 
+ 
+-/* used to clone heading nodes when split by an <HR> */
++/**
++ *  Used to clone heading nodes when split by an `<HR>`
++ */
+ TY_PRIVATE Node* TY_(CloneNode)( TidyDocImpl* doc, Node *element );
+ 
+-/* free node's attributes */
++
++/**
++ *  Free node's attributes
++ */
+ TY_PRIVATE void TY_(FreeAttrs)( TidyDocImpl* doc, Node *node );
+ 
+-/* doesn't repair attribute list linkage */
++
++/**
++ *  Doesn't repair attribute list linkage
++ */
+ TY_PRIVATE void TY_(FreeAttribute)( TidyDocImpl* doc, AttVal *av );
+ 
+-/* detach attribute from node */
++
++/**
++ * Detach attribute from node
++ */
+ TY_PRIVATE void TY_(DetachAttribute)( Node *node, AttVal *attr );
+ 
+-/* detach attribute from node then free it
+-*/
++
++/**
++ *  Detach attribute from node then free it.
++ */
+ TY_PRIVATE void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
+ 
+-/*
+-  Free document nodes by iterating through peers and recursing
+-  through children. Set next to NULL before calling FreeNode()
+-  to avoid freeing peer nodes. Doesn't patch up prev/next links.
++
++/**
++ *  Free document nodes by iterating through peers and recursing
++ *  through children. Set `next` to `NULL` before calling `FreeNode()`
++ *  to avoid freeing peer nodes. Doesn't patch up prev/next links.
+  */
+ TY_PRIVATE void TY_(FreeNode)( TidyDocImpl* doc, Node *node );
+ 
++
+ TY_PRIVATE Node* TY_(TextToken)( Lexer *lexer );
+ 
+-/* used for creating preformatted text from Word2000 */
++
++/**
++ *  Used for creating preformatted text from Word2000.
++ */
+ TY_PRIVATE Node* TY_(NewLineNode)( Lexer *lexer );
+ 
+-/* used for adding a &nbsp; for Word2000 */
++
++/**
++ *  Used for adding a &nbsp; for Word2000.
++ */
+ TY_PRIVATE Node* TY_(NewLiteralTextNode)(Lexer *lexer, ctmbstr txt );
+ 
+-TY_PRIVATE void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
+-/* TY_PRIVATE void AddStringLiteralLen( Lexer* lexer, ctmbstr str, int len ); */
+ 
+-/* find element */
++TY_PRIVATE void TY_(AddStringLiteral)( Lexer* lexer, ctmbstr str );
+ TY_PRIVATE Node* TY_(FindDocType)( TidyDocImpl* doc );
+ TY_PRIVATE Node* TY_(FindHTML)( TidyDocImpl* doc );
+ TY_PRIVATE Node* TY_(FindHEAD)( TidyDocImpl* doc );
+@@ -468,10 +518,16 @@ TY_PRIVATE Node* TY_(FindTITLE)(TidyDocImpl* doc);
+ TY_PRIVATE Node* TY_(FindBody)( TidyDocImpl* doc );
+ TY_PRIVATE Node* TY_(FindXmlDecl)(TidyDocImpl* doc);
+ 
+-/* Returns containing block element, if any */
++
++/**
++ *  Returns containing block element, if any
++ */
+ TY_PRIVATE Node* TY_(FindContainer)( Node* node );
+ 
+-/* add meta element for Tidy */
++
++/**
++ *  Add meta element for Tidy.
++ */
+ TY_PRIVATE Bool TY_(AddGenerator)( TidyDocImpl* doc );
+ 
+ TY_PRIVATE uint TY_(ApparentVersion)( TidyDocImpl* doc );
+@@ -485,118 +541,209 @@ TY_PRIVATE Bool TY_(WarnMissingSIInEmittedDocType)( TidyDocImpl* doc );
+ TY_PRIVATE Bool TY_(SetXHTMLDocType)( TidyDocImpl* doc );
+ 
+ 
+-/* fixup doctype if missing */
++/**
++ *  Fixup doctype if missing.
++ */
+ TY_PRIVATE Bool TY_(FixDocType)( TidyDocImpl* doc );
+ 
+-/* ensure XML document starts with <?xml version="1.0"?> */
+-/* add encoding attribute if not using ASCII or UTF-8 output */
++
++/**
++ *  Ensure XML document starts with <?xml version="1.0"?>,and
++ *  add encoding attribute if not using ASCII or UTF-8 output.
++ */
+ TY_PRIVATE Bool TY_(FixXmlDecl)( TidyDocImpl* doc );
+ 
++
+ TY_PRIVATE Node* TY_(InferredTag)(TidyDocImpl* doc, TidyTagId id);
+ 
+ TY_PRIVATE void TY_(UngetToken)( TidyDocImpl* doc );
+ 
+-
+-/*
+-  modes for GetToken()
+-
+-  MixedContent   -- for elements which don't accept PCDATA
+-  Preformatted   -- white space preserved as is
+-  IgnoreMarkup   -- for CDATA elements such as script, style
+-*/
+-typedef enum
+-{
+-  IgnoreWhitespace,
+-  MixedContent,
+-  Preformatted,
+-  IgnoreMarkup,
+-  OtherNamespace,
+-  CdataContent
+-} GetTokenMode;
+-
+ TY_PRIVATE Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode );
+ 
+ TY_PRIVATE void TY_(InitMap)(void);
+ 
+ 
+-/* create a new attribute */
++/**
++ *  Create a new attribute.
++ */
+ TY_PRIVATE AttVal* TY_(NewAttribute)( TidyDocImpl* doc );
+ 
+-/* create a new attribute with given name and value */
++
++/**
++ *  Create a new attribute with given name and value.
++ */
+ TY_PRIVATE AttVal* TY_(NewAttributeEx)( TidyDocImpl* doc, ctmbstr name, ctmbstr value,
+                              int delim );
+ 
+-/* insert attribute at the end of attribute list of a node */
++
++/**
++ *  Insert attribute at the end of attribute list of a node.
++ */
+ TY_PRIVATE void TY_(InsertAttributeAtEnd)( Node *node, AttVal *av );
+ 
+-/* insert attribute at the start of attribute list of a node */
++/**
++ *  Insert attribute at the start of attribute list of a node.
++ */
+ TY_PRIVATE void TY_(InsertAttributeAtStart)( Node *node, AttVal *av );
+ 
+-/*************************************
+-  In-line Stack functions
+-*************************************/
+-
+-
+-/* duplicate attributes */
+-TY_PRIVATE AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
+ 
+-/*
+-  push a copy of an inline node onto stack
+-  but don't push if implicit or OBJECT or APPLET
+-  (implicit tags are ones generated from the istack)
++/** @}
++ *  @name Inline Stack Functions
++ *  @{
++ */
+ 
+-  One issue arises with pushing inlines when
+-  the tag is already pushed. For instance:
+ 
+-      <p><em>text
+-      <p><em>more text
++/**
++ *  Duplicate attributes.
++ */
++TY_PRIVATE AttVal* TY_(DupAttrs)( TidyDocImpl* doc, AttVal* attrs );
+ 
+-  Shouldn't be mapped to
+ 
+-      <p><em>text</em></p>
+-      <p><em><em>more text</em></em>
+-*/
++/**
++ *  Push a copy of an inline node onto stack, but don't push if
++ *  implicit or OBJECT or APPLET (implicit tags are ones generated
++ *  from the istack).
++ *
++ *  One issue arises with pushing inlines when the tag is already pushed.
++ *  For instance:
++ *    ~~~
++ *    <p><em>text
++ *    <p><em>more text
++ *    ~~~
++ *  Shouldn't be mapped to
++ *    ~~~
++ *    <p><em>text</em></p>
++ *    <p><em><em>more text</em></em>
++ *    ~~~
++ */
+ TY_PRIVATE void TY_(PushInline)( TidyDocImpl* doc, Node* node );
+ 
+-/* pop inline stack */
++
++/**
++ * Pop inline stack.
++ */
+ TY_PRIVATE void TY_(PopInline)( TidyDocImpl* doc, Node* node );
+ 
++
+ TY_PRIVATE Bool TY_(IsPushed)( TidyDocImpl* doc, Node* node );
+ TY_PRIVATE Bool TY_(IsPushedLast)( TidyDocImpl* doc, Node *element, Node *node );
+ 
+-/*
+-  This has the effect of inserting "missing" inline
+-  elements around the contents of blocklevel elements
+-  such as P, TD, TH, DIV, PRE etc. This procedure is
+-  called at the start of ParseBlock. when the inline
+-  stack is not empty, as will be the case in:
+ 
+-    <i><h1>italic heading</h1></i>
++/**
++ *  This has the effect of inserting "missing" inline elements around the
++ *  contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. This
++ *  procedure is called at the start of `ParseBlock`, when the inline
++ *  stack is not empty, as will be the case in:
++ *    ~~~
++ *    <i><h1>italic heading</h1></i>
++ *    ~~~
++ *  which is then treated as equivalent to
++ *    ~~~
++ *    <h1><i>italic heading</i></h1>
++ *    ~~~
++ *  This is implemented by setting the lexer into a mode where it gets
++ *  tokens from the inline stack rather than from the input stream.
++ */
++TY_PRIVATE int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
+ 
+-  which is then treated as equivalent to
+ 
+-    <h1><i>italic heading</i></h1>
++/**
++ *  Fefer duplicates when entering a table or other
++ *  element where the inlines shouldn't be duplicated.
++ */
++TY_PRIVATE void TY_(DeferDup)( TidyDocImpl* doc );
+ 
+-  This is implemented by setting the lexer into a mode
+-  where it gets tokens from the inline stack rather than
+-  from the input stream.
+-*/
+-TY_PRIVATE int TY_(InlineDup)( TidyDocImpl* doc, Node *node );
+ 
+-/*
+- defer duplicates when entering a table or other
+- element where the inlines shouldn't be duplicated
+-*/
+-TY_PRIVATE void TY_(DeferDup)( TidyDocImpl* doc );
+ TY_PRIVATE Node* TY_(InsertedToken)( TidyDocImpl* doc );
+ 
+-/* stack manipulation for inline elements */
++/**
++ *  Stack manipulation for inline elements
++ */
+ TY_PRIVATE Bool TY_(SwitchInline)( TidyDocImpl* doc, Node* element, Node* node );
++
++
+ TY_PRIVATE Bool TY_(InlineDup1)( TidyDocImpl* doc, Node* node, Node* element );
+ 
++
++/** @}
++ *  @name Generic stack of nodes.
++ *  @{
++ */
++
++
++/**
++ * This typedef represents a stack of addresses to nodes. Tidy uses these to
++ * try to limit recursion by pushing nodes to a stack when possible instead
++ * of recursing.
++ */
++typedef struct _Stack {
++    int top;                        /**< Current top position. */
++    unsigned capacity;              /**< Current capacity. Can be expanded. */
++    Node **firstNode;               /** A pointer to the first pointer to a Node in an array of node addresses. */
++    TidyAllocator* allocator;       /**< Tidy's allocator, used at instantiation and expanding. */
++} Stack;
++ 
++
++/**
++ * Create a new stack with a given starting capacity. If memory allocation
++ * fails, then the allocator will panic the program automatically.
++ */
++TY_PRIVATE Stack* TY_(newStack)(TidyDocImpl *doc, uint capacity);
++ 
++
++/**
++ *  Increase the stack size. This will be called automatically when the
++ *  current stack is full. If memory allocation fails, then the allocator
++ *  will panic the program automatically.
++ */
++TY_PRIVATE void TY_(growStack)(Stack *stack);
++
++
++/**
++ * Stack is full when top is equal to the last index.
++ */
++TY_PRIVATE Bool TY_(stackFull)(Stack *stack);
++
++
++/**
++ * Stack is empty when top is equal to -1
++ */
++TY_PRIVATE Bool TY_(stackEmpty)(Stack *stack);
++ 
++
++/**
++ * Push an item to the stack.
++ */
++TY_PRIVATE void TY_(push)(Stack *stack, Node *node);
++
++
++/**
++ * Pop an item from the stack.
++ */
++TY_PRIVATE Node* TY_(pop)(Stack *stack);
++
++
++/**
++ * Peek at the stack.
++ */
++TY_PRIVATE Node* TY_(peek)(Stack *stack);
++
++/**
++ *  Frees the stack when done.
++ */
++TY_PRIVATE void TY_(freeStack)(Stack *stack);
++
++
++/** @}
++ */
++
++
+ #ifdef __cplusplus
+ }
+ #endif
+ 
+ 
++/** @} end parser_h group */
++/** @} end internal_api group */
++
+ #endif /* __LEXER_H__ */
diff --git a/SPECS/tidy/tidy.spec b/SPECS/tidy/tidy.spec
index f08a787a47..459ebaef5c 100644
--- a/SPECS/tidy/tidy.spec
+++ b/SPECS/tidy/tidy.spec
@@ -5,12 +5,13 @@
 Summary:        Utility to clean up and pretty print HTML/XHTML/XML
 Name:           tidy
 Version:        5.8.0
-Release:        5%{?dist}
+Release:        6%{?dist}
 License:        W3C
 Vendor:         Microsoft Corporation
 Distribution:   Mariner
 URL:            https://www.html-tidy.org/
 Source0:        https://github.com/htacg/%{upname}/archive/%{version}.tar.gz#/%{upname}-%{version}.tar.gz
+Patch0:         CVE-2021-33391.patch
 BuildRequires:  cmake
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@@ -92,6 +93,9 @@ rm -fv %{buildroot}%{_libdir}/libtidy.a
 %{_libdir}/pkgconfig/tidy.pc
 
 %changelog
+* Mon May 01 2023 Sean Dougherty <sdougherty@microsoft.com> - 5.8.0-6
+- Backported patch to fix CVE-2021-33391
+
 * Tue Oct 18 2022 Osama Esmail <osamaesmail@microsoft.com> - 5.8.0-5
 - Upgraded from 5.7.28 to 5.8.0
 - Changed libtidys.a to libtidy.a