From c1ce4a8d423012ecc6701b4a2bab057adc0c86cd Mon Sep 17 00:00:00 2001
From: "jgellman%netscape.com" <jgellman%netscape.com>
Date: Thu, 20 Aug 1998 21:20:50 +0000
Subject: [PATCH] 2nd try; new parser from james clark

---
 modules/xml/expat/xmlparse/hashtable.c |   19 +-
 modules/xml/expat/xmlparse/hashtable.h |   10 +-
 modules/xml/expat/xmlparse/xmlparse.c  | 1578 +++++++++++++++++++-----
 modules/xml/expat/xmlparse/xmlparse.h  |  295 ++++-
 modules/xml/expat/xmltok/xmlrole.c     |    2 +-
 modules/xml/expat/xmltok/xmlrole.h     |    1 +
 modules/xml/expat/xmltok/xmltok.c      |  606 +++++++--
 modules/xml/expat/xmltok/xmltok.h      |   74 +-
 modules/xml/expat/xmltok/xmltok_impl.c |  188 ++-
 modules/xml/macbuild/XML.mcp.exp       |    7 +-
 10 files changed, 2269 insertions(+), 511 deletions(-)

diff --git a/modules/xml/expat/xmlparse/hashtable.c b/modules/xml/expat/xmlparse/hashtable.c
index e029f9f44227..2876975bd2bb 100644
--- a/modules/xml/expat/xmlparse/hashtable.c
+++ b/modules/xml/expat/xmlparse/hashtable.c
@@ -1,7 +1,7 @@
 /*
 The contents of this file are subject to the Mozilla Public License
 Version 1.0 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
+csompliance with the License. You may obtain a copy of the License at
 http://www.mozilla.org/MPL/
 
 Software distributed under the License is distributed on an "AS IS"
@@ -18,15 +18,22 @@ James Clark. All Rights Reserved.
 Contributor(s):
 */
 
-#include "xmldef.h"
-#include "hashtable.h"
 #include <stdlib.h>
 #include <string.h>
 
+#include "xmldef.h"
+#include "hashtable.h"
+
+#ifdef XML_UNICODE
+#define keycmp wcscmp
+#else
+#define keycmp strcmp
+#endif
+
 #define INIT_SIZE 64
 
 static
-unsigned long hash(const char *s)
+unsigned long hash(KEY s)
 {
   unsigned long h = 0;
   while (*s)
@@ -34,7 +41,7 @@ unsigned long hash(const char *s)
   return h;
 }
 
-NAMED *lookup(HASH_TABLE *table, const char *name, size_t createSize)
+NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize)
 {
   size_t i;
   if (table->size == 0) {
@@ -52,7 +59,7 @@ NAMED *lookup(HASH_TABLE *table, const char *name, size_t createSize)
     for (i = h & (table->size - 1);
          table->v[i];
          i == 0 ? i = table->size - 1 : --i) {
-      if (strcmp(name, table->v[i]->name) == 0)
+      if (keycmp(name, table->v[i]->name) == 0)
 	return table->v[i];
     }
     if (!createSize)
diff --git a/modules/xml/expat/xmlparse/hashtable.h b/modules/xml/expat/xmlparse/hashtable.h
index 19ec9902bdfd..d10e591c7ff8 100644
--- a/modules/xml/expat/xmlparse/hashtable.h
+++ b/modules/xml/expat/xmlparse/hashtable.h
@@ -21,8 +21,14 @@ Contributor(s):
 
 #include <stddef.h>
 
+#ifdef XML_UNICODE
+typedef const wchar_t *KEY;
+#else
+typedef const char *KEY;
+#endif
+
 typedef struct {
-  const char *name;
+  KEY name;
 } NAMED;
 
 typedef struct {
@@ -32,7 +38,7 @@ typedef struct {
   size_t usedLim;
 } HASH_TABLE;
 
-NAMED *lookup(HASH_TABLE *table, const char *name, size_t createSize);
+NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize);
 void hashTableInit(HASH_TABLE *);
 void hashTableDestroy(HASH_TABLE *);
 
diff --git a/modules/xml/expat/xmlparse/xmlparse.c b/modules/xml/expat/xmlparse/xmlparse.c
index 3c733bc0eb0e..4709579a7f63 100644
--- a/modules/xml/expat/xmlparse/xmlparse.c
+++ b/modules/xml/expat/xmlparse/xmlparse.c
@@ -18,16 +18,43 @@ James Clark. All Rights Reserved.
 Contributor(s):
 */
 
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
 #include "xmldef.h"
+
+#ifdef XML_UNICODE
+#define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
+#define XmlConvert XmlUtf16Convert
+#define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
+#define XmlEncode XmlUtf16Encode
+#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
+typedef unsigned short ICHAR;
+#else
+#define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
+#define XmlConvert XmlUtf8Convert
+#define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
+#define XmlEncode XmlUtf8Encode
+#define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
+typedef char ICHAR;
+#endif
+
+#ifdef XML_UNICODE_WCHAR_T
+#define XML_T(x) L ## x
+#else
+#define XML_T(x) x
+#endif
+
+/* Round up n to be a multiple of sz, where sz is a power of 2. */
+#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
+
 #include "xmlparse.h"
 #include "xmltok.h"
 #include "xmlrole.h"
 #include "hashtable.h"
 
-#include <stdlib.h>
-#include <string.h>
-
-#define INIT_TAG_BUF_SIZE 32
+#define INIT_TAG_BUF_SIZE 32  /* must be a multiple of sizeof(XML_Char) */
 #define INIT_DATA_BUF_SIZE 1024
 #define INIT_ATTS_SIZE 16
 #define INIT_BLOCK_SIZE 1024
@@ -37,51 +64,51 @@ typedef struct tag {
   struct tag *parent;
   const char *rawName;
   int rawNameLength;
-  const char *name;
+  const XML_Char *name;
   char *buf;
   char *bufEnd;
 } TAG;
 
 typedef struct {
-  const char *name;
-  const char *textPtr;
+  const XML_Char *name;
+  const XML_Char *textPtr;
   int textLen;
-  const char *systemId;
-  const char *publicId;
-  const char *notation;
+  const XML_Char *systemId;
+  const XML_Char *base;
+  const XML_Char *publicId;
+  const XML_Char *notation;
   char open;
-  char magic;
 } ENTITY;
 
 typedef struct block {
   struct block *next;
   int size;
-  char s[1];
+  XML_Char s[1];
 } BLOCK;
 
 typedef struct {
   BLOCK *blocks;
   BLOCK *freeBlocks;
-  const char *end;
-  char *ptr;
-  char *start;
+  const XML_Char *end;
+  XML_Char *ptr;
+  XML_Char *start;
 } STRING_POOL;
 
-/* The byte before the name is a scratch byte used to determine whether
+/* The XML_Char before the name is used to determine whether
 an attribute has been specified. */
 typedef struct {
-  char *name;
+  XML_Char *name;
   char maybeTokenized;
 } ATTRIBUTE_ID;
 
 typedef struct {
   const ATTRIBUTE_ID *id;
   char isCdata;
-  const char *value;
+  const XML_Char *value;
 } DEFAULT_ATTRIBUTE;
 
 typedef struct {
-  const char *name;
+  const XML_Char *name;
   int nDefaultAtts;
   int allocDefaultAtts;
   DEFAULT_ATTRIBUTE *defaultAtts;
@@ -94,6 +121,7 @@ typedef struct {
   STRING_POOL pool;
   int complete;
   int standalone;
+  const XML_Char *base;
 } DTD;
 
 typedef enum XML_Error Processor(XML_Parser parser,
@@ -102,16 +130,30 @@ typedef enum XML_Error Processor(XML_Parser parser,
 				 const char **endPtr);
 
 static Processor prologProcessor;
+static Processor prologInitProcessor;
 static Processor contentProcessor;
+static Processor cdataSectionProcessor;
 static Processor epilogProcessor;
 static Processor errorProcessor;
+static Processor externalEntityInitProcessor;
+static Processor externalEntityInitProcessor2;
+static Processor externalEntityInitProcessor3;
+static Processor externalEntityContentProcessor;
 
+static enum XML_Error
+handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
+static enum XML_Error
+processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
+static enum XML_Error
+initializeEncoding(XML_Parser parser);
 static enum XML_Error
 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
 	  const char *start, const char *end, const char **endPtr);
-static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *tagName, const char *s);
+static enum XML_Error
+doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
+static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const XML_Char *tagName, const char *s);
 static int
-defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const char *dfltValue);
+defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
 static enum XML_Error
 storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
 		    STRING_POOL *);
@@ -124,31 +166,42 @@ static enum XML_Error
 storeEntityValue(XML_Parser parser, const char *start, const char *end);
 static int
 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
+static void
+reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
 
+static const XML_Char *getOpenEntityNames(XML_Parser parser);
+static int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames);
+static void normalizePublicId(XML_Char *s);
 static int dtdInit(DTD *);
 static void dtdDestroy(DTD *);
+static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
 static void poolInit(STRING_POOL *);
 static void poolClear(STRING_POOL *);
 static void poolDestroy(STRING_POOL *);
-static char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
-			const char *ptr, const char *end);
-static char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
-			     const char *ptr, const char *end);
+static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+			    const char *ptr, const char *end);
+static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+				  const char *ptr, const char *end);
 static int poolGrow(STRING_POOL *pool);
+static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
+static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
 
 #define poolStart(pool) ((pool)->start)
 #define poolEnd(pool) ((pool)->ptr)
 #define poolLength(pool) ((pool)->ptr - (pool)->start)
 #define poolChop(pool) ((void)--(pool->ptr))
-#define poolLastByte(pool) (((pool)->ptr)[-1])
+#define poolLastChar(pool) (((pool)->ptr)[-1])
 #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
 #define poolFinish(pool) ((pool)->start = (pool)->ptr)
-#define poolAppendByte(pool, c) \
+#define poolAppendChar(pool, c) \
   (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
    ? 0 \
    : ((*((pool)->ptr)++ = c), 1))
 
 typedef struct {
+  /* The first member must be userData so that the XML_GetUserData macro works. */
+  void *userData;
+  void *handlerArg;
   char *buffer;
   /* first character to be parsed */
   const char *bufferPtr;
@@ -156,22 +209,36 @@ typedef struct {
   char *bufferEnd;
   /* allocated end of buffer */
   const char *bufferLim;
-  long bufferEndByteIndex;
-  char *dataBuf;
-  char *dataBufEnd;
-  void *userData;
+  long parseEndByteIndex;
+  const char *parseEndPtr;
+  XML_Char *dataBuf;
+  XML_Char *dataBufEnd;
   XML_StartElementHandler startElementHandler;
   XML_EndElementHandler endElementHandler;
   XML_CharacterDataHandler characterDataHandler;
   XML_ProcessingInstructionHandler processingInstructionHandler;
+  XML_DefaultHandler defaultHandler;
+  XML_UnparsedEntityDeclHandler unparsedEntityDeclHandler;
+  XML_NotationDeclHandler notationDeclHandler;
+  XML_ExternalEntityRefHandler externalEntityRefHandler;
+  XML_UnknownEncodingHandler unknownEncodingHandler;
   const ENCODING *encoding;
   INIT_ENCODING initEncoding;
+  const XML_Char *protocolEncodingName;
+  void *unknownEncodingMem;
+  void *unknownEncodingData;
+  void *unknownEncodingHandlerData;
+  void (*unknownEncodingRelease)(void *);
   PROLOG_STATE prologState;
   Processor *processor;
   enum XML_Error errorCode;
-  const char *errorPtr;
+  const char *eventPtr;
+  const char *eventEndPtr;
+  const char *positionPtr;
   int tagLevel;
   ENTITY *declEntity;
+  const XML_Char *declNotationName;
+  const XML_Char *declNotationPublicId;
   ELEMENT_TYPE *declElementType;
   ATTRIBUTE_ID *declAttributeId;
   char declAttributeIsCdata;
@@ -181,7 +248,6 @@ typedef struct {
   int attsSize;
   ATTRIBUTE *atts;
   POSITION position;
-  long errorByteIndex;
   STRING_POOL tempPool;
   STRING_POOL temp2Pool;
   char *groupConnector;
@@ -190,28 +256,44 @@ typedef struct {
 } Parser;
 
 #define userData (((Parser *)parser)->userData)
+#define handlerArg (((Parser *)parser)->handlerArg)
 #define startElementHandler (((Parser *)parser)->startElementHandler)
 #define endElementHandler (((Parser *)parser)->endElementHandler)
 #define characterDataHandler (((Parser *)parser)->characterDataHandler)
 #define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler)
+#define defaultHandler (((Parser *)parser)->defaultHandler)
+#define unparsedEntityDeclHandler (((Parser *)parser)->unparsedEntityDeclHandler)
+#define notationDeclHandler (((Parser *)parser)->notationDeclHandler)
+#define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler)
+#define unknownEncodingHandler (((Parser *)parser)->unknownEncodingHandler)
 #define encoding (((Parser *)parser)->encoding)
 #define initEncoding (((Parser *)parser)->initEncoding)
+#define unknownEncodingMem (((Parser *)parser)->unknownEncodingMem)
+#define unknownEncodingData (((Parser *)parser)->unknownEncodingData)
+#define unknownEncodingHandlerData \
+  (((Parser *)parser)->unknownEncodingHandlerData)
+#define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease)
+#define protocolEncodingName (((Parser *)parser)->protocolEncodingName)
 #define prologState (((Parser *)parser)->prologState)
 #define processor (((Parser *)parser)->processor)
 #define errorCode (((Parser *)parser)->errorCode)
-#define errorPtr (((Parser *)parser)->errorPtr)
-#define errorByteIndex (((Parser *)parser)->errorByteIndex)
+#define eventPtr (((Parser *)parser)->eventPtr)
+#define eventEndPtr (((Parser *)parser)->eventEndPtr)
+#define positionPtr (((Parser *)parser)->positionPtr)
 #define position (((Parser *)parser)->position)
 #define tagLevel (((Parser *)parser)->tagLevel)
 #define buffer (((Parser *)parser)->buffer)
 #define bufferPtr (((Parser *)parser)->bufferPtr)
 #define bufferEnd (((Parser *)parser)->bufferEnd)
-#define bufferEndByteIndex (((Parser *)parser)->bufferEndByteIndex)
+#define parseEndByteIndex (((Parser *)parser)->parseEndByteIndex)
+#define parseEndPtr (((Parser *)parser)->parseEndPtr)
 #define bufferLim (((Parser *)parser)->bufferLim)
 #define dataBuf (((Parser *)parser)->dataBuf)
 #define dataBufEnd (((Parser *)parser)->dataBufEnd)
 #define dtd (((Parser *)parser)->dtd)
 #define declEntity (((Parser *)parser)->declEntity)
+#define declNotationName (((Parser *)parser)->declNotationName)
+#define declNotationPublicId (((Parser *)parser)->declNotationPublicId)
 #define declElementType (((Parser *)parser)->declElementType)
 #define declAttributeId (((Parser *)parser)->declAttributeId)
 #define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata)
@@ -225,50 +307,102 @@ typedef struct {
 #define groupSize (((Parser *)parser)->groupSize)
 #define hadExternalDoctype (((Parser *)parser)->hadExternalDoctype)
 
-XML_Parser XML_ParserCreate(const char *encodingName)
+XML_Parser XML_ParserCreate(const XML_Char *encodingName)
 {
   XML_Parser parser = malloc(sizeof(Parser));
   if (!parser)
     return parser;
-  processor = prologProcessor;
+  processor = prologInitProcessor;
   XmlPrologStateInit(&prologState);
   userData = 0;
+  handlerArg = 0;
   startElementHandler = 0;
   endElementHandler = 0;
   characterDataHandler = 0;
   processingInstructionHandler = 0;
+  defaultHandler = 0;
+  unparsedEntityDeclHandler = 0;
+  notationDeclHandler = 0;
+  externalEntityRefHandler = 0;
+  unknownEncodingHandler = 0;
   buffer = 0;
   bufferPtr = 0;
   bufferEnd = 0;
-  bufferEndByteIndex = 0;
+  parseEndByteIndex = 0;
+  parseEndPtr = 0;
   bufferLim = 0;
   declElementType = 0;
   declAttributeId = 0;
   declEntity = 0;
+  declNotationName = 0;
+  declNotationPublicId = 0;
   memset(&position, 0, sizeof(POSITION));
   errorCode = XML_ERROR_NONE;
-  errorByteIndex = 0;
-  errorPtr = 0;
+  eventPtr = 0;
+  eventEndPtr = 0;
+  positionPtr = 0;
   tagLevel = 0;
   tagStack = 0;
   freeTagList = 0;
   attsSize = INIT_ATTS_SIZE;
   atts = malloc(attsSize * sizeof(ATTRIBUTE));
-  dataBuf = malloc(INIT_DATA_BUF_SIZE);
+  dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
   groupSize = 0;
   groupConnector = 0;
   hadExternalDoctype = 0;
+  unknownEncodingMem = 0;
+  unknownEncodingRelease = 0;
+  unknownEncodingData = 0;
+  unknownEncodingHandlerData = 0;
   poolInit(&tempPool);
   poolInit(&temp2Pool);
-  if (!dtdInit(&dtd) || !atts || !dataBuf) {
+  protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
+  if (!dtdInit(&dtd) || !atts || !dataBuf
+      || (encodingName && !protocolEncodingName)) {
     XML_ParserFree(parser);
     return 0;
   }
   dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
-  if (!XmlInitEncoding(&initEncoding, &encoding, encodingName)) {
-    errorCode = XML_ERROR_UNKNOWN_ENCODING;
-    processor = errorProcessor;
+  XmlInitEncoding(&initEncoding, &encoding, 0);
+  return parser;
+}
+
+XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
+					  const XML_Char *openEntityNames,
+					  const XML_Char *encodingName)
+{
+  XML_Parser parser = oldParser;
+  DTD *oldDtd = &dtd;
+  XML_StartElementHandler oldStartElementHandler = startElementHandler;
+  XML_EndElementHandler oldEndElementHandler = endElementHandler;
+  XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
+  XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
+  XML_DefaultHandler oldDefaultHandler = defaultHandler;
+  XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
+  XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
+  void *oldUserData = userData;
+  void *oldHandlerArg = handlerArg;
+ 
+  parser = XML_ParserCreate(encodingName);
+  if (!parser)
+    return 0;
+  startElementHandler = oldStartElementHandler;
+  endElementHandler = oldEndElementHandler;
+  characterDataHandler = oldCharacterDataHandler;
+  processingInstructionHandler = oldProcessingInstructionHandler;
+  defaultHandler = oldDefaultHandler;
+  externalEntityRefHandler = oldExternalEntityRefHandler;
+  unknownEncodingHandler = oldUnknownEncodingHandler;
+  userData = oldUserData;
+  if (oldUserData == oldHandlerArg)
+    handlerArg = userData;
+  else
+    handlerArg = parser;
+  if (!dtdCopy(&dtd, oldDtd) || !setOpenEntityNames(parser, openEntityNames)) {
+    XML_ParserFree(parser);
+    return 0;
   }
+  processor = externalEntityInitProcessor;
   return parser;
 }
 
@@ -294,12 +428,41 @@ void XML_ParserFree(XML_Parser parser)
   free(groupConnector);
   free(buffer);
   free(dataBuf);
+  free(unknownEncodingMem);
+  if (unknownEncodingRelease)
+    unknownEncodingRelease(unknownEncodingData);
   free(parser);
 }
 
+void XML_UseParserAsHandlerArg(XML_Parser parser)
+{
+  handlerArg = parser;
+}
+
 void XML_SetUserData(XML_Parser parser, void *p)
 {
-  userData = p;
+  if (handlerArg == userData)
+    handlerArg = userData = p;
+  else
+    userData = p;
+}
+
+int XML_SetBase(XML_Parser parser, const XML_Char *p)
+{
+  if (p) {
+    p = poolCopyString(&dtd.pool, p);
+    if (!p)
+      return 0;
+    dtd.base = p;
+  }
+  else
+    dtd.base = 0;
+  return 1;
+}
+
+const XML_Char *XML_GetBase(XML_Parser parser)
+{
+  return dtd.base;
 }
 
 void XML_SetElementHandler(XML_Parser parser,
@@ -322,44 +485,75 @@ void XML_SetProcessingInstructionHandler(XML_Parser parser,
   processingInstructionHandler = handler;
 }
 
+void XML_SetDefaultHandler(XML_Parser parser,
+			   XML_DefaultHandler handler)
+{
+  defaultHandler = handler;
+}
+
+void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
+				      XML_UnparsedEntityDeclHandler handler)
+{
+  unparsedEntityDeclHandler = handler;
+}
+
+void XML_SetNotationDeclHandler(XML_Parser parser,
+				XML_NotationDeclHandler handler)
+{
+  notationDeclHandler = handler;
+}
+
+void XML_SetExternalEntityRefHandler(XML_Parser parser,
+				     XML_ExternalEntityRefHandler handler)
+{
+  externalEntityRefHandler = handler;
+}
+
+void XML_SetUnknownEncodingHandler(XML_Parser parser,
+				   XML_UnknownEncodingHandler handler,
+				   void *data)
+{
+  unknownEncodingHandler = handler;
+  unknownEncodingHandlerData = data;
+}
+
 int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
 {
-  bufferEndByteIndex += len;
   if (len == 0) {
     if (!isFinal)
       return 1;
-    errorCode = processor(parser, bufferPtr, bufferEnd, 0);
-    return errorCode == XML_ERROR_NONE;
+    errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
+    if (errorCode == XML_ERROR_NONE)
+      return 1;
+    eventEndPtr = eventPtr;
+    return 0;
   }
   else if (bufferPtr == bufferEnd) {
     const char *end;
     int nLeftOver;
+    parseEndByteIndex += len;
+    positionPtr = s;
     if (isFinal) {
-      errorCode = processor(parser, s, s + len, 0);
+      errorCode = processor(parser, s, parseEndPtr = s + len, 0);
       if (errorCode == XML_ERROR_NONE)
 	return 1;
-      if (errorPtr) {
-	errorByteIndex = bufferEndByteIndex - (s + len - errorPtr);
-	XmlUpdatePosition(encoding, s, errorPtr, &position);
-      }
+      eventEndPtr = eventPtr;
       return 0;
     }
-    errorCode = processor(parser, s, s + len, &end);
+    errorCode = processor(parser, s, parseEndPtr = s + len, &end);
     if (errorCode != XML_ERROR_NONE) {
-      if (errorPtr) {
-	errorByteIndex = bufferEndByteIndex - (s + len - errorPtr);
-	XmlUpdatePosition(encoding, s, errorPtr, &position);
-      }
+      eventEndPtr = eventPtr;
       return 0;
     }
-    XmlUpdatePosition(encoding, s, end, &position);
+    XmlUpdatePosition(encoding, positionPtr, end, &position);
     nLeftOver = s + len - end;
     if (nLeftOver) {
       if (buffer == 0 || nLeftOver > bufferLim - buffer) {
 	/* FIXME avoid integer overflow */
-	buffer = realloc(buffer, len * 2);
+	buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
 	if (!buffer) {
 	  errorCode = XML_ERROR_NO_MEMORY;
+	  eventPtr = eventEndPtr = 0;
 	  return 0;
 	}
 	bufferLim = buffer + len * 2;
@@ -379,19 +573,18 @@ int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
 int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
 {
   const char *start = bufferPtr;
+  positionPtr = start;
   bufferEnd += len;
-  errorCode = processor(parser, bufferPtr, bufferEnd,
+  parseEndByteIndex += len;
+  errorCode = processor(parser, start, parseEndPtr = bufferEnd,
 			isFinal ? (const char **)0 : &bufferPtr);
   if (errorCode == XML_ERROR_NONE) {
     if (!isFinal)
-      XmlUpdatePosition(encoding, start, bufferPtr, &position);
+      XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
     return 1;
   }
   else {
-    if (errorPtr) {
-      errorByteIndex = bufferEndByteIndex - (bufferEnd - errorPtr);
-      XmlUpdatePosition(encoding, start, errorPtr, &position);
-    }
+    eventEndPtr = eventPtr;
     return 0;
   }
 }
@@ -431,49 +624,67 @@ void *XML_GetBuffer(XML_Parser parser, int len)
   return bufferEnd;
 }
 
-int XML_GetErrorCode(XML_Parser parser)
+enum XML_Error XML_GetErrorCode(XML_Parser parser)
 {
   return errorCode;
 }
 
-int XML_GetErrorLineNumber(XML_Parser parser)
+long XML_GetCurrentByteIndex(XML_Parser parser)
 {
+  if (eventPtr)
+    return parseEndByteIndex - (parseEndPtr - eventPtr);
+  return -1;
+}
+
+int XML_GetCurrentLineNumber(XML_Parser parser)
+{
+  if (eventPtr) {
+    XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
+    positionPtr = eventPtr;
+  }
   return position.lineNumber + 1;
 }
 
-int XML_GetErrorColumnNumber(XML_Parser parser)
+int XML_GetCurrentColumnNumber(XML_Parser parser)
 {
+  if (eventPtr) {
+    XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
+    positionPtr = eventPtr;
+  }
   return position.columnNumber;
 }
 
-long XML_GetErrorByteIndex(XML_Parser parser)
+void XML_DefaultCurrent(XML_Parser parser)
 {
-  return errorByteIndex;
+  if (defaultHandler)
+    reportDefault(parser, encoding, eventPtr, eventEndPtr);
 }
 
-const char *XML_ErrorString(int code)
+const XML_LChar *XML_ErrorString(int code)
 {
-  static const char *message[] = {
+  static const XML_LChar *message[] = {
     0,
-    "out of memory",
-    "syntax error",
-    "no element found",
-    "not well-formed",
-    "unclosed token",
-    "unclosed token",
-    "mismatched tag",
-    "duplicate attribute",
-    "junk after document element",
-    "parameter entity reference not allowed within declaration in internal subset",
-    "undefined entity",
-    "recursive entity reference",
-    "asynchronous entity",
-    "reference to invalid character number",
-    "reference to binary entity",
-    "reference to external entity in attribute",
-    "xml processing instruction not at start of external entity",
-    "unknown encoding",
-    "encoding specified in XML declaration is incorrect"
+    XML_T("out of memory"),
+    XML_T("syntax error"),
+    XML_T("no element found"),
+    XML_T("not well-formed"),
+    XML_T("unclosed token"),
+    XML_T("unclosed token"),
+    XML_T("mismatched tag"),
+    XML_T("duplicate attribute"),
+    XML_T("junk after document element"),
+    XML_T("illegal parameter entity reference"),
+    XML_T("undefined entity"),
+    XML_T("recursive entity reference"),
+    XML_T("asynchronous entity"),
+    XML_T("reference to invalid character number"),
+    XML_T("reference to binary entity"),
+    XML_T("reference to external entity in attribute"),
+    XML_T("xml processing instruction not at start of external entity"),
+    XML_T("unknown encoding"),
+    XML_T("encoding specified in XML declaration is incorrect"),
+    XML_T("unclosed CDATA section"),
+    XML_T("error in processing external entity reference")
   };
   if (code > 0 && code < sizeof(message)/sizeof(message[0]))
     return message[code];
@@ -489,6 +700,96 @@ enum XML_Error contentProcessor(XML_Parser parser,
   return doContent(parser, 0, encoding, start, end, endPtr);
 }
 
+static
+enum XML_Error externalEntityInitProcessor(XML_Parser parser,
+					   const char *start,
+					   const char *end,
+					   const char **endPtr)
+{
+  enum XML_Error result = initializeEncoding(parser);
+  if (result != XML_ERROR_NONE)
+    return result;
+  processor = externalEntityInitProcessor2;
+  return externalEntityInitProcessor2(parser, start, end, endPtr);
+}
+
+static
+enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
+					    const char *start,
+					    const char *end,
+					    const char **endPtr)
+{
+  const char *next;
+  int tok = XmlContentTok(encoding, start, end, &next);
+  switch (tok) {
+  case XML_TOK_BOM:
+    start = next;
+    break;
+  case XML_TOK_PARTIAL:
+    if (endPtr) {
+      *endPtr = start;
+      return XML_ERROR_NONE;
+    }
+    eventPtr = start;
+    return XML_ERROR_UNCLOSED_TOKEN;
+  case XML_TOK_PARTIAL_CHAR:
+    if (endPtr) {
+      *endPtr = start;
+      return XML_ERROR_NONE;
+    }
+    eventPtr = start;
+    return XML_ERROR_PARTIAL_CHAR;
+  }
+  processor = externalEntityInitProcessor3;
+  return externalEntityInitProcessor3(parser, start, end, endPtr);
+}
+
+static
+enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
+					    const char *start,
+					    const char *end,
+					    const char **endPtr)
+{
+  const char *next;
+  int tok = XmlContentTok(encoding, start, end, &next);
+  switch (tok) {
+  case XML_TOK_XML_DECL:
+    {
+      enum XML_Error result = processXmlDecl(parser, 1, start, next);
+      if (result != XML_ERROR_NONE)
+	return result;
+      start = next;
+    }
+    break;
+  case XML_TOK_PARTIAL:
+    if (endPtr) {
+      *endPtr = start;
+      return XML_ERROR_NONE;
+    }
+    eventPtr = start;
+    return XML_ERROR_UNCLOSED_TOKEN;
+  case XML_TOK_PARTIAL_CHAR:
+    if (endPtr) {
+      *endPtr = start;
+      return XML_ERROR_NONE;
+    }
+    eventPtr = start;
+    return XML_ERROR_PARTIAL_CHAR;
+  }
+  processor = externalEntityContentProcessor;
+  tagLevel = 1;
+  return doContent(parser, 1, encoding, start, end, endPtr);
+}
+
+static
+enum XML_Error externalEntityContentProcessor(XML_Parser parser,
+					      const char *start,
+					      const char *end,
+					      const char **endPtr)
+{
+  return doContent(parser, 1, encoding, start, end, endPtr);
+}
+
 static enum XML_Error
 doContent(XML_Parser parser,
 	  int startTagLevel,
@@ -497,89 +798,130 @@ doContent(XML_Parser parser,
 	  const char *end,
 	  const char **nextPtr)
 {
-  const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+  const ENCODING *internalEnc = XmlGetInternalEncoding();
+  const char *dummy;
+  const char **eventPP;
+  const char **eventEndPP;
+  if (enc == encoding) {
+    eventPP = &eventPtr;
+    *eventPP = s;
+    eventEndPP = &eventEndPtr;
+  }
+  else
+    eventPP = eventEndPP = &dummy;
   for (;;) {
     const char *next;
     int tok = XmlContentTok(enc, s, end, &next);
+    *eventEndPP = next;
     switch (tok) {
     case XML_TOK_TRAILING_CR:
+      if (nextPtr) {
+	*nextPtr = s;
+	return XML_ERROR_NONE;
+      }
+      *eventEndPP = end;
+      if (characterDataHandler) {
+	XML_Char c = XML_T('\n');
+	characterDataHandler(handlerArg, &c, 1);
+      }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, end);
+      if (startTagLevel == 0)
+	return XML_ERROR_NO_ELEMENTS;
+      if (tagLevel != startTagLevel)
+	return XML_ERROR_ASYNC_ENTITY;
+      return XML_ERROR_NONE;
     case XML_TOK_NONE:
       if (nextPtr) {
 	*nextPtr = s;
 	return XML_ERROR_NONE;
       }
       if (startTagLevel > 0) {
-	if (tagLevel != startTagLevel) {
-	  errorPtr = s;
+	if (tagLevel != startTagLevel)
 	  return XML_ERROR_ASYNC_ENTITY;
-        }
 	return XML_ERROR_NONE;
       }
-      errorPtr = s;
       return XML_ERROR_NO_ELEMENTS;
     case XML_TOK_INVALID:
-      errorPtr = next;
+      *eventPP = next;
       return XML_ERROR_INVALID_TOKEN;
     case XML_TOK_PARTIAL:
       if (nextPtr) {
 	*nextPtr = s;
 	return XML_ERROR_NONE;
       }
-      errorPtr = s;
       return XML_ERROR_UNCLOSED_TOKEN;
     case XML_TOK_PARTIAL_CHAR:
       if (nextPtr) {
 	*nextPtr = s;
 	return XML_ERROR_NONE;
       }
-      errorPtr = s;
       return XML_ERROR_PARTIAL_CHAR;
     case XML_TOK_ENTITY_REF:
       {
-	const char *name = poolStoreString(&dtd.pool, enc,
-					   s + enc->minBytesPerChar,
-					   next - enc->minBytesPerChar);
+	const XML_Char *name;
 	ENTITY *entity;
+	XML_Char ch = XmlPredefinedEntityName(enc,
+					      s + enc->minBytesPerChar,
+					      next - enc->minBytesPerChar);
+	if (ch) {
+	  if (characterDataHandler)
+	    characterDataHandler(handlerArg, &ch, 1);
+	  else if (defaultHandler)
+	    reportDefault(parser, enc, s, next);
+	  break;
+	}
+	name = poolStoreString(&dtd.pool, enc,
+				s + enc->minBytesPerChar,
+				next - enc->minBytesPerChar);
 	if (!name)
 	  return XML_ERROR_NO_MEMORY;
 	entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
 	poolDiscard(&dtd.pool);
 	if (!entity) {
-	  if (dtd.complete || dtd.standalone) {
-	    errorPtr = s;
+	  if (dtd.complete || dtd.standalone)
 	    return XML_ERROR_UNDEFINED_ENTITY;
-	  }
+	  if (defaultHandler)
+	    reportDefault(parser, enc, s, next);
 	  break;
 	}
-	if (entity->magic) {
-	  if (characterDataHandler)
-	    characterDataHandler(userData, entity->textPtr, entity->textLen);
-	  break;
-	}
-	if (entity->open) {
-	  errorPtr = s;
+	if (entity->open)
 	  return XML_ERROR_RECURSIVE_ENTITY_REF;
-	}
-	if (entity->notation) {
-	  errorPtr = s;
+	if (entity->notation)
 	  return XML_ERROR_BINARY_ENTITY_REF;
-	}
 	if (entity) {
 	  if (entity->textPtr) {
 	    enum XML_Error result;
+	    if (defaultHandler) {
+	      reportDefault(parser, enc, s, next);
+	      break;
+	    }
+	    /* Protect against the possibility that somebody sets
+	       the defaultHandler from inside another handler. */
+	    *eventEndPP = *eventPP;
 	    entity->open = 1;
 	    result = doContent(parser,
 			       tagLevel,
-			       utf8,
-			       entity->textPtr,
-			       entity->textPtr + entity->textLen,
+			       internalEnc,
+			       (char *)entity->textPtr,
+			       (char *)(entity->textPtr + entity->textLen),
 			       0);
 	    entity->open = 0;
-	    if (result) {
-	      errorPtr = s;
+	    if (result)
 	      return result;
-	    }
 	  }
+	  else if (externalEntityRefHandler) {
+	    const XML_Char *openEntityNames;
+	    entity->open = 1;
+	    openEntityNames = getOpenEntityNames(parser);
+	    entity->open = 0;
+	    if (!openEntityNames)
+	      return XML_ERROR_NO_MEMORY;
+	    if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId))
+	      return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
+	  }
+	  else if (defaultHandler)
+	    reportDefault(parser, enc, s, next);
 	}
 	break;
       }
@@ -613,6 +955,7 @@ doContent(XML_Parser parser,
 	if (nextPtr) {
 	  if (tag->rawNameLength > tag->bufEnd - tag->buf) {
 	    int bufSize = tag->rawNameLength * 4;
+	    bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
 	    tag->buf = realloc(tag->buf, bufSize);
 	    if (!tag->buf)
 	      return XML_ERROR_NO_MEMORY;
@@ -624,18 +967,19 @@ doContent(XML_Parser parser,
 	++tagLevel;
 	if (startElementHandler) {
 	  enum XML_Error result;
-	  char *toPtr;
-	  const char *rawNameEnd = tag->rawName + tag->rawNameLength;
+	  XML_Char *toPtr;
 	  for (;;) {
+	    const char *rawNameEnd = tag->rawName + tag->rawNameLength;
 	    const char *fromPtr = tag->rawName;
 	    int bufSize;
-	    toPtr = tag->buf;
 	    if (nextPtr)
-	      toPtr += tag->rawNameLength;
+	      toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
+	    else
+	      toPtr = (XML_Char *)tag->buf;
 	    tag->name = toPtr;
-	    XmlConvert(enc, XML_UTF8_ENCODING,
+	    XmlConvert(enc,
 		       &fromPtr, rawNameEnd,
-	               &toPtr, tag->bufEnd - 1);
+		       (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
 	    if (fromPtr == rawNameEnd)
 	      break;
 	    bufSize = (tag->bufEnd - tag->buf) << 1;
@@ -643,16 +987,21 @@ doContent(XML_Parser parser,
 	    if (!tag->buf)
 	      return XML_ERROR_NO_MEMORY;
 	    tag->bufEnd = tag->buf + bufSize;
+	    if (nextPtr)
+	      tag->rawName = tag->buf;
 	  }
-	  *toPtr = 0;
+	  *toPtr = XML_T('\0');
 	  result = storeAtts(parser, enc, tag->name, s);
 	  if (result)
 	    return result;
-	  startElementHandler(userData, tag->name, (const char **)atts);
+	  startElementHandler(handlerArg, tag->name, (const XML_Char **)atts);
 	  poolClear(&tempPool);
 	}
-	else
+	else {
 	  tag->name = 0;
+	  if (defaultHandler)
+	    reportDefault(parser, enc, s, next);
+	}
 	break;
       }
     case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
@@ -665,9 +1014,9 @@ doContent(XML_Parser parser,
     case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
       if (startElementHandler || endElementHandler) {
 	const char *rawName = s + enc->minBytesPerChar;
-	const char *name = poolStoreString(&tempPool, enc, rawName,
-					   rawName
-					   + XmlNameLength(enc, rawName));
+	const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
+					       rawName
+					       + XmlNameLength(enc, rawName));
 	if (!name)
 	  return XML_ERROR_NO_MEMORY;
 	poolFinish(&tempPool);
@@ -675,20 +1024,23 @@ doContent(XML_Parser parser,
 	  enum XML_Error result = storeAtts(parser, enc, name, s);
 	  if (result)
 	    return result;
-	  startElementHandler(userData, name, (const char **)atts);
+	  startElementHandler(handlerArg, name, (const XML_Char **)atts);
+	}
+	if (endElementHandler) {
+	  if (startElementHandler)
+	    *eventEndPP = *eventPP;
+	  endElementHandler(handlerArg, name);
 	}
-	if (endElementHandler)
-	  endElementHandler(userData, name);
 	poolClear(&tempPool);
       }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
       if (tagLevel == 0)
 	return epilogProcessor(parser, next, end, nextPtr);
       break;
     case XML_TOK_END_TAG:
-      if (tagLevel == startTagLevel) {
-        errorPtr = s;
+      if (tagLevel == startTagLevel)
         return XML_ERROR_ASYNC_ENTITY;
-      }
       else {
 	int len;
 	const char *rawName;
@@ -700,22 +1052,24 @@ doContent(XML_Parser parser,
 	len = XmlNameLength(enc, rawName);
 	if (len != tag->rawNameLength
 	    || memcmp(tag->rawName, rawName, len) != 0) {
-	  errorPtr = rawName;
+	  *eventPP = rawName;
 	  return XML_ERROR_TAG_MISMATCH;
 	}
 	--tagLevel;
 	if (endElementHandler) {
 	  if (tag->name)
-	    endElementHandler(userData, tag->name);
+	    endElementHandler(handlerArg, tag->name);
 	  else {
-	    const char *name = poolStoreString(&tempPool, enc, rawName,
-	                                       rawName + len);
+	    const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
+	                                           rawName + len);
 	    if (!name)
-	    return XML_ERROR_NO_MEMORY;
-	    endElementHandler(userData, name);
+	      return XML_ERROR_NO_MEMORY;
+	    endElementHandler(handlerArg, name);
 	    poolClear(&tempPool);
 	  }
 	}
+	else if (defaultHandler)
+	  reportDefault(parser, enc, s, next);
 	if (tagLevel == 0)
 	  return epilogProcessor(parser, next, end, nextPtr);
       }
@@ -723,51 +1077,98 @@ doContent(XML_Parser parser,
     case XML_TOK_CHAR_REF:
       {
 	int n = XmlCharRefNumber(enc, s);
-	if (n < 0) {
-	  errorPtr = s;
+	if (n < 0)
 	  return XML_ERROR_BAD_CHAR_REF;
-	}
 	if (characterDataHandler) {
-	  char buf[XML_MAX_BYTES_PER_CHAR];
-	  characterDataHandler(userData, buf, XmlEncode(utf8, n, buf));
+	  XML_Char buf[XML_ENCODE_MAX];
+	  characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
 	}
+	else if (defaultHandler)
+	  reportDefault(parser, enc, s, next);
       }
       break;
     case XML_TOK_XML_DECL:
-      errorPtr = s;
       return XML_ERROR_MISPLACED_XML_PI;
     case XML_TOK_DATA_NEWLINE:
       if (characterDataHandler) {
-	char c = '\n';
-	characterDataHandler(userData, &c, 1);
+	XML_Char c = XML_T('\n');
+	characterDataHandler(handlerArg, &c, 1);
+      }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
+      break;
+    case XML_TOK_CDATA_SECT_OPEN:
+      {
+	enum XML_Error result;
+	if (characterDataHandler)
+  	  characterDataHandler(handlerArg, dataBuf, 0);
+	else if (defaultHandler)
+	  reportDefault(parser, enc, s, next);
+	result = doCdataSection(parser, enc, &next, end, nextPtr);
+	if (!next) {
+	  processor = cdataSectionProcessor;
+	  return result;
+	}
       }
       break;
-    case XML_TOK_CDATA_SECTION:
+    case XML_TOK_TRAILING_RSQB:
+      if (nextPtr) {
+	*nextPtr = s;
+	return XML_ERROR_NONE;
+      }
       if (characterDataHandler) {
-	const char *lim = next - enc->minBytesPerChar * 3;
-	s += enc->minBytesPerChar * 9;
-	do {
-	  char *dataPtr = dataBuf;
-	  XmlConvert(enc, XML_UTF8_ENCODING, &s, lim, &dataPtr, dataBufEnd);
-	  characterDataHandler(userData, dataBuf, dataPtr - dataBuf);
-	} while (s != lim);
+	if (MUST_CONVERT(enc, s)) {
+	  ICHAR *dataPtr = (ICHAR *)dataBuf;
+	  XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
+	  characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+	}
+	else
+	  characterDataHandler(handlerArg,
+		  	       (XML_Char *)s,
+			       (XML_Char *)end - (XML_Char *)s);
       }
-      break;
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, end);
+      if (startTagLevel == 0) {
+        *eventPP = end;
+	return XML_ERROR_NO_ELEMENTS;
+      }
+      if (tagLevel != startTagLevel) {
+	*eventPP = end;
+	return XML_ERROR_ASYNC_ENTITY;
+      }
+      return XML_ERROR_NONE;
     case XML_TOK_DATA_CHARS:
       if (characterDataHandler) {
-	do {
-	  char *dataPtr = dataBuf;
-	  XmlConvert(enc, XML_UTF8_ENCODING, &s, next, &dataPtr, dataBufEnd);
-	  characterDataHandler(userData, dataBuf, dataPtr - dataBuf);
-	} while (s != next);
+	if (MUST_CONVERT(enc, s)) {
+	  for (;;) {
+	    ICHAR *dataPtr = (ICHAR *)dataBuf;
+	    XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+	    *eventEndPP = s;
+	    characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+	    if (s == next)
+	      break;
+	    *eventPP = s;
+	  }
+	}
+	else
+	  characterDataHandler(handlerArg,
+			       (XML_Char *)s,
+			       (XML_Char *)next - (XML_Char *)s);
       }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
       break;
     case XML_TOK_PI:
       if (!reportProcessingInstruction(parser, enc, s, next))
 	return XML_ERROR_NO_MEMORY;
       break;
+    default:
+      if (defaultHandler)
+	reportDefault(parser, enc, s, next);
+      break;
     }
-    s = next;
+    *eventPP = s = next;
   }
   /* not reached */
 }
@@ -776,11 +1177,11 @@ doContent(XML_Parser parser,
 otherwise just check the attributes for well-formedness. */
 
 static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
-				const char *tagName, const char *s)
+				const XML_Char *tagName, const char *s)
 {
   ELEMENT_TYPE *elementType = 0;
   int nDefaultAtts = 0;
-  const char **appAtts = (const char **)atts;
+  const XML_Char **appAtts;
   int i;
   int n;
 
@@ -792,13 +1193,15 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
   
   n = XmlGetAttributes(enc, s, attsSize, atts);
   if (n + nDefaultAtts > attsSize) {
-    attsSize = 2*n;
+    int oldAttsSize = attsSize;
+    attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
     atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
     if (!atts)
       return XML_ERROR_NO_MEMORY;
-    if (n > attsSize)
+    if (n > oldAttsSize)
       XmlGetAttributes(enc, s, n, atts);
   }
+  appAtts = (const XML_Char **)atts;
   for (i = 0; i < n; i++) {
     ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
 					  atts[i].name
@@ -806,7 +1209,8 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
     if (!attId)
       return XML_ERROR_NO_MEMORY;
     if ((attId->name)[-1]) {
-      errorPtr = atts[i].name;
+      if (enc == encoding)
+	eventPtr = atts[i].name;
       return XML_ERROR_DUPLICATE_ATTRIBUTE;
     }
     (attId->name)[-1] = 1;
@@ -858,10 +1262,242 @@ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
     appAtts[i << 1] = 0;
   }
   while (i-- > 0)
-    ((char *)appAtts[i << 1])[-1] = 0;
+    ((XML_Char *)appAtts[i << 1])[-1] = 0;
   return XML_ERROR_NONE;
 }
 
+/* The idea here is to avoid using stack for each CDATA section when
+the whole file is parsed with one call. */
+
+static
+enum XML_Error cdataSectionProcessor(XML_Parser parser,
+				     const char *start,
+			    	     const char *end,
+				     const char **endPtr)
+{
+  enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
+  if (start) {
+    processor = contentProcessor;
+    return contentProcessor(parser, start, end, endPtr);
+  }
+  return result;
+}
+
+/* startPtr gets set to non-null is the section is closed, and to null if
+the section is not yet closed. */
+
+static
+enum XML_Error doCdataSection(XML_Parser parser,
+			      const ENCODING *enc,
+			      const char **startPtr,
+			      const char *end,
+			      const char **nextPtr)
+{
+  const char *s = *startPtr;
+  const char *dummy;
+  const char **eventPP;
+  const char **eventEndPP;
+  if (enc == encoding) {
+    eventPP = &eventPtr;
+    *eventPP = s;
+    eventEndPP = &eventEndPtr;
+  }
+  else
+    eventPP = eventEndPP = &dummy;
+  *startPtr = 0;
+  for (;;) {
+    const char *next;
+    int tok = XmlCdataSectionTok(enc, s, end, &next);
+    *eventEndPP = next;
+    switch (tok) {
+    case XML_TOK_CDATA_SECT_CLOSE:
+      if (characterDataHandler)
+	characterDataHandler(handlerArg, dataBuf, 0);
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
+      *startPtr = next;
+      return XML_ERROR_NONE;
+    case XML_TOK_DATA_NEWLINE:
+      if (characterDataHandler) {
+	XML_Char c = XML_T('\n');
+	characterDataHandler(handlerArg, &c, 1);
+      }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
+      break;
+    case XML_TOK_DATA_CHARS:
+      if (characterDataHandler) {
+	if (MUST_CONVERT(enc, s)) {
+	  for (;;) {
+  	    ICHAR *dataPtr = (ICHAR *)dataBuf;
+	    XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+	    *eventEndPP = next;
+	    characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+	    if (s == next)
+	      break;
+	    *eventPP = s;
+	  }
+	}
+	else
+	  characterDataHandler(handlerArg,
+		  	       (XML_Char *)s,
+			       (XML_Char *)next - (XML_Char *)s);
+      }
+      else if (defaultHandler)
+	reportDefault(parser, enc, s, next);
+      break;
+    case XML_TOK_INVALID:
+      *eventPP = next;
+      return XML_ERROR_INVALID_TOKEN;
+    case XML_TOK_PARTIAL_CHAR:
+      if (nextPtr) {
+	*nextPtr = s;
+	return XML_ERROR_NONE;
+      }
+      return XML_ERROR_PARTIAL_CHAR;
+    case XML_TOK_PARTIAL:
+    case XML_TOK_NONE:
+      if (nextPtr) {
+	*nextPtr = s;
+	return XML_ERROR_NONE;
+      }
+      return XML_ERROR_UNCLOSED_CDATA_SECTION;
+    default:
+      abort();
+    }
+    *eventPP = s = next;
+  }
+  /* not reached */
+}
+
+static enum XML_Error
+initializeEncoding(XML_Parser parser)
+{
+  const char *s;
+#ifdef XML_UNICODE
+  char encodingBuf[128];
+  if (!protocolEncodingName)
+    s = 0;
+  else {
+    int i;
+    for (i = 0; protocolEncodingName[i]; i++) {
+      if (i == sizeof(encodingBuf) - 1
+	  || protocolEncodingName[i] >= 0x80
+	  || protocolEncodingName[i] < 0) {
+	encodingBuf[0] = '\0';
+	break;
+      }
+      encodingBuf[i] = (char)protocolEncodingName[i];
+    }
+    encodingBuf[i] = '\0';
+    s = encodingBuf;
+  }
+#else
+  s = protocolEncodingName;
+#endif
+  if (XmlInitEncoding(&initEncoding, &encoding, s))
+    return XML_ERROR_NONE;
+  return handleUnknownEncoding(parser, protocolEncodingName);
+}
+
+static enum XML_Error
+processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
+	       const char *s, const char *next)
+{
+  const char *encodingName = 0;
+  const ENCODING *newEncoding = 0;
+  const char *version;
+  int standalone = -1;
+  if (!XmlParseXmlDecl(isGeneralTextEntity,
+		       encoding,
+		       s,
+		       next,
+		       &eventPtr,
+		       &version,
+		       &encodingName,
+		       &newEncoding,
+		       &standalone))
+    return XML_ERROR_SYNTAX;
+  if (defaultHandler)
+    reportDefault(parser, encoding, s, next);
+  if (!protocolEncodingName) {
+    if (newEncoding) {
+      if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
+	eventPtr = encodingName;
+	return XML_ERROR_INCORRECT_ENCODING;
+      }
+      encoding = newEncoding;
+    }
+    else if (encodingName) {
+      enum XML_Error result;
+      const XML_Char *s = poolStoreString(&tempPool,
+					  encoding,
+					  encodingName,
+					  encodingName
+					  + XmlNameLength(encoding, encodingName));
+      if (!s)
+	return XML_ERROR_NO_MEMORY;
+      result = handleUnknownEncoding(parser, s);
+      poolDiscard(&tempPool);
+      if (result == XML_ERROR_UNKNOWN_ENCODING)
+	eventPtr = encodingName;
+      return result;
+    }
+  }
+  if (!isGeneralTextEntity && standalone == 1)
+    dtd.standalone = 1;
+  return XML_ERROR_NONE;
+}
+
+static enum XML_Error
+handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
+{
+  if (unknownEncodingHandler) {
+    XML_Encoding info;
+    int i;
+    for (i = 0; i < 256; i++)
+      info.map[i] = -1;
+    info.convert = 0;
+    info.data = 0;
+    info.release = 0;
+    if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
+      ENCODING *enc;
+      unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
+      if (!unknownEncodingMem) {
+	if (info.release)
+	  info.release(info.data);
+	return XML_ERROR_NO_MEMORY;
+      }
+      enc = XmlInitUnknownEncoding(unknownEncodingMem,
+				   info.map,
+				   info.convert,
+				   info.data);
+      if (enc) {
+	unknownEncodingData = info.data;
+	unknownEncodingRelease = info.release;
+	encoding = enc;
+	return XML_ERROR_NONE;
+      }
+    }
+    if (info.release)
+      info.release(info.data);
+  }
+  return XML_ERROR_UNKNOWN_ENCODING;
+}
+
+static enum XML_Error
+prologInitProcessor(XML_Parser parser,
+		    const char *s,
+		    const char *end,
+		    const char **nextPtr)
+{
+  enum XML_Error result = initializeEncoding(parser);
+  if (result != XML_ERROR_NONE)
+    return result;
+  processor = prologProcessor;
+  return prologProcessor(parser, s, end, nextPtr);
+}
+
 static enum XML_Error
 prologProcessor(XML_Parser parser,
 		const char *s,
@@ -878,7 +1514,7 @@ prologProcessor(XML_Parser parser,
       }
       switch (tok) {
       case XML_TOK_INVALID:
-	errorPtr = next;
+	eventPtr = next;
 	return XML_ERROR_INVALID_TOKEN;
       case XML_TOK_NONE:
 	return XML_ERROR_NO_ELEMENTS;
@@ -887,7 +1523,7 @@ prologProcessor(XML_Parser parser,
       case XML_TOK_PARTIAL_CHAR:
 	return XML_ERROR_PARTIAL_CHAR;
       case XML_TOK_TRAILING_CR:
-	errorPtr = s + encoding->minBytesPerChar;
+	eventPtr = s + encoding->minBytesPerChar;
 	return XML_ERROR_NO_ELEMENTS;
       default:
 	abort();
@@ -896,43 +1532,29 @@ prologProcessor(XML_Parser parser,
     switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
     case XML_ROLE_XML_DECL:
       {
-	const char *encodingName = 0;
-	const ENCODING *newEncoding = 0;
-	const char *version;
-	int standalone = -1;
-	if (!XmlParseXmlDecl(0,
-			     encoding,
-			     s,
-			     next,
-			     &errorPtr,
-			     &version,
-			     &encodingName,
-			     &newEncoding,
-			     &standalone))
-	  return XML_ERROR_SYNTAX;
-	if (newEncoding) {
-	  if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
-	    errorPtr = encodingName;
-	    return XML_ERROR_INCORRECT_ENCODING;
-	  }
-	  encoding = newEncoding;
-	}
-	else if (encodingName) {
-	  errorPtr = encodingName;
-	  return XML_ERROR_UNKNOWN_ENCODING;
-	}
-	if (standalone == 1)
-	  dtd.standalone = 1;
-	break;
+	enum XML_Error result = processXmlDecl(parser, 0, s, next);
+	if (result != XML_ERROR_NONE)
+	  return result;
       }
+      break;
     case XML_ROLE_DOCTYPE_SYSTEM_ID:
       hadExternalDoctype = 1;
       break;
     case XML_ROLE_DOCTYPE_PUBLIC_ID:
     case XML_ROLE_ENTITY_PUBLIC_ID:
-    case XML_ROLE_NOTATION_PUBLIC_ID:
-      if (!XmlIsPublicId(encoding, s, next, &errorPtr))
+      if (!XmlIsPublicId(encoding, s, next, &eventPtr))
 	return XML_ERROR_SYNTAX;
+      if (declEntity) {
+	XML_Char *tem = poolStoreString(&dtd.pool,
+	                                encoding,
+					s + encoding->minBytesPerChar,
+	  				next - encoding->minBytesPerChar);
+	if (!tem)
+	  return XML_ERROR_NO_MEMORY;
+	normalizePublicId(tem);
+	declEntity->publicId = tem;
+	poolFinish(&dtd.pool);
+      }
       break;
     case XML_ROLE_INSTANCE_START:
       processor = contentProcessor;
@@ -941,7 +1563,7 @@ prologProcessor(XML_Parser parser,
       return contentProcessor(parser, s, end, nextPtr);
     case XML_ROLE_ATTLIST_ELEMENT_NAME:
       {
-	const char *name = poolStoreString(&dtd.pool, encoding, s, next);
+	const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
 	if (!name)
 	  return XML_ERROR_NO_MEMORY;
 	declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
@@ -971,7 +1593,7 @@ prologProcessor(XML_Parser parser,
     case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
     case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
       {
-	const char *attVal;
+	const XML_Char *attVal;
 	enum XML_Error result
 	  = storeAttributeValue(parser, encoding, declAttributeIsCdata,
 				s + encoding->minBytesPerChar,
@@ -1000,6 +1622,7 @@ prologProcessor(XML_Parser parser,
 	  				       next - encoding->minBytesPerChar);
 	if (!declEntity->systemId)
 	  return XML_ERROR_NO_MEMORY;
+	declEntity->base = dtd.base;
 	poolFinish(&dtd.pool);
       }
       break;
@@ -1009,11 +1632,26 @@ prologProcessor(XML_Parser parser,
 	if (!declEntity->notation)
 	  return XML_ERROR_NO_MEMORY;
 	poolFinish(&dtd.pool);
+	if (unparsedEntityDeclHandler) {
+	  eventPtr = eventEndPtr = s;
+	  unparsedEntityDeclHandler(handlerArg,
+				    declEntity->name,
+				    declEntity->base,
+				    declEntity->systemId,
+				    declEntity->publicId,
+				    declEntity->notation);
+	}
+
       }
       break;
     case XML_ROLE_GENERAL_ENTITY_NAME:
       {
-	const char *name = poolStoreString(&dtd.pool, encoding, s, next);
+	const XML_Char *name;
+	if (XmlPredefinedEntityName(encoding, s, next)) {
+	  declEntity = 0;
+	  break;
+	}
+	name = poolStoreString(&dtd.pool, encoding, s, next);
 	if (!name)
 	  return XML_ERROR_NO_MEMORY;
 	if (dtd.complete) {
@@ -1036,8 +1674,61 @@ prologProcessor(XML_Parser parser,
     case XML_ROLE_PARAM_ENTITY_NAME:
       declEntity = 0;
       break;
+    case XML_ROLE_NOTATION_NAME:
+      declNotationPublicId = 0;
+      declNotationName = 0;
+      if (notationDeclHandler) {
+	declNotationName = poolStoreString(&tempPool, encoding, s, next);
+	if (!declNotationName)
+	  return XML_ERROR_NO_MEMORY;
+	poolFinish(&tempPool);
+      }
+      break;
+    case XML_ROLE_NOTATION_PUBLIC_ID:
+      if (!XmlIsPublicId(encoding, s, next, &eventPtr))
+	return XML_ERROR_SYNTAX;
+      if (declNotationName) {
+	XML_Char *tem = poolStoreString(&tempPool,
+	                                encoding,
+					s + encoding->minBytesPerChar,
+	  				next - encoding->minBytesPerChar);
+	if (!tem)
+	  return XML_ERROR_NO_MEMORY;
+	normalizePublicId(tem);
+	declNotationPublicId = tem;
+	poolFinish(&tempPool);
+      }
+      break;
+    case XML_ROLE_NOTATION_SYSTEM_ID:
+      if (declNotationName && notationDeclHandler) {
+	const XML_Char *systemId
+	  = poolStoreString(&tempPool, encoding,
+			    s + encoding->minBytesPerChar,
+	  		    next - encoding->minBytesPerChar);
+	if (!systemId)
+	  return XML_ERROR_NO_MEMORY;
+	eventPtr = eventEndPtr = s;
+	notationDeclHandler(handlerArg,
+			    declNotationName,
+			    dtd.base,
+			    systemId,
+			    declNotationPublicId);
+      }
+      poolClear(&tempPool);
+      break;
+    case XML_ROLE_NOTATION_NO_SYSTEM_ID:
+      if (declNotationPublicId && notationDeclHandler) {
+	eventPtr = eventEndPtr = s;
+	notationDeclHandler(handlerArg,
+			    declNotationName,
+			    dtd.base,
+			    0,
+			    declNotationPublicId);
+      }
+      poolClear(&tempPool);
+      break;
     case XML_ROLE_ERROR:
-      errorPtr = s;
+      eventPtr = s;
       switch (tok) {
       case XML_TOK_PARAM_ENTITY_REF:
 	return XML_ERROR_PARAM_ENTITY_REF;
@@ -1059,14 +1750,14 @@ prologProcessor(XML_Parser parser,
       break;
     case XML_ROLE_GROUP_SEQUENCE:
       if (groupConnector[prologState.level] == '|') {
-	errorPtr = s;
+	eventPtr = s;
 	return XML_ERROR_SYNTAX;
       }
       groupConnector[prologState.level] = ',';
       break;
     case XML_ROLE_GROUP_CHOICE:
       if (groupConnector[prologState.level] == ',') {
-	errorPtr = s;
+	eventPtr = s;
 	return XML_ERROR_SYNTAX;
       }
       groupConnector[prologState.level] = '|';
@@ -1077,12 +1768,26 @@ prologProcessor(XML_Parser parser,
     case XML_ROLE_NONE:
       switch (tok) {
       case XML_TOK_PI:
+	eventPtr = s;
+	eventEndPtr = next;
 	if (!reportProcessingInstruction(parser, encoding, s, next))
 	  return XML_ERROR_NO_MEMORY;
 	break;
       }
       break;
     }
+    if (defaultHandler) {
+      switch (tok) {
+      case XML_TOK_PI:
+      case XML_TOK_BOM:
+      case XML_TOK_XML_DECL:
+	break;
+      default:
+	eventPtr = s;
+	eventEndPtr = next;
+	reportDefault(parser, encoding, s, next);
+      }
+    }
     s = next;
   }
   /* not reached */
@@ -1095,44 +1800,50 @@ enum XML_Error epilogProcessor(XML_Parser parser,
 			       const char **nextPtr)
 {
   processor = epilogProcessor;
+  eventPtr = s;
   for (;;) {
     const char *next;
     int tok = XmlPrologTok(encoding, s, end, &next);
+    eventEndPtr = next;
     switch (tok) {
     case XML_TOK_TRAILING_CR:
+      if (defaultHandler) {
+	eventEndPtr = end;
+	reportDefault(parser, encoding, s, end);
+      }
+      /* fall through */
     case XML_TOK_NONE:
       if (nextPtr)
 	*nextPtr = end;
       return XML_ERROR_NONE;
     case XML_TOK_PROLOG_S:
     case XML_TOK_COMMENT:
+      if (defaultHandler)
+	reportDefault(parser, encoding, s, next);
       break;
     case XML_TOK_PI:
       if (!reportProcessingInstruction(parser, encoding, s, next))
 	return XML_ERROR_NO_MEMORY;
       break;
     case XML_TOK_INVALID:
-      errorPtr = next;
+      eventPtr = next;
       return XML_ERROR_INVALID_TOKEN;
     case XML_TOK_PARTIAL:
       if (nextPtr) {
 	*nextPtr = s;
 	return XML_ERROR_NONE;
       }
-      errorPtr = s;
       return XML_ERROR_UNCLOSED_TOKEN;
     case XML_TOK_PARTIAL_CHAR:
       if (nextPtr) {
 	*nextPtr = s;
 	return XML_ERROR_NONE;
       }
-      errorPtr = s;
       return XML_ERROR_PARTIAL_CHAR;
     default:
-      errorPtr = s;
       return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
     }
-    s = next;
+    eventPtr = s = next;
   }
 }
 
@@ -1153,9 +1864,9 @@ storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
   enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
   if (result)
     return result;
-  if (!isCdata && poolLength(pool) && poolLastByte(pool) == ' ')
+  if (!isCdata && poolLength(pool) && poolLastChar(pool) == XML_T(' '))
     poolChop(pool);
-  if (!poolAppendByte(pool, 0))
+  if (!poolAppendChar(pool, XML_T('\0')))
     return XML_ERROR_NO_MEMORY;
   return XML_ERROR_NONE;
 }
@@ -1165,7 +1876,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
 		     const char *ptr, const char *end,
 		     STRING_POOL *pool)
 {
-  const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+  const ENCODING *internalEnc = XmlGetInternalEncoding();
   for (;;) {
     const char *next;
     int tok = XmlAttributeValueTok(enc, ptr, end, &next);
@@ -1173,31 +1884,35 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
     case XML_TOK_NONE:
       return XML_ERROR_NONE;
     case XML_TOK_INVALID:
-      errorPtr = next;
+      if (enc == encoding)
+	eventPtr = next;
       return XML_ERROR_INVALID_TOKEN;
     case XML_TOK_PARTIAL:
-      errorPtr = ptr;
+      if (enc == encoding)
+	eventPtr = ptr;
       return XML_ERROR_INVALID_TOKEN;
     case XML_TOK_CHAR_REF:
       {
-	char buf[XML_MAX_BYTES_PER_CHAR];
+	XML_Char buf[XML_ENCODE_MAX];
 	int i;
 	int n = XmlCharRefNumber(enc, ptr);
 	if (n < 0) {
-	  errorPtr = ptr;
+	  if (enc == encoding)
+	    eventPtr = ptr;
       	  return XML_ERROR_BAD_CHAR_REF;
 	}
 	if (!isCdata
-	    && n == ' '
-	    && (poolLength(pool) == 0 || poolLastByte(pool) == ' '))
+	    && n == 0x20 /* space */
+	    && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
 	  break;
-	n = XmlEncode(utf8, n, buf);
+	n = XmlEncode(n, (ICHAR *)buf);
 	if (!n) {
-	  errorPtr = ptr;
+	  if (enc == encoding)
+	    eventPtr = ptr;
 	  return XML_ERROR_BAD_CHAR_REF;
 	}
 	for (i = 0; i < n; i++) {
-	  if (!poolAppendByte(pool, buf[i]))
+	  if (!poolAppendChar(pool, buf[i]))
 	    return XML_ERROR_NO_MEMORY;
 	}
       }
@@ -1212,55 +1927,60 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
       /* fall through */
     case XML_TOK_ATTRIBUTE_VALUE_S:
     case XML_TOK_DATA_NEWLINE:
-      if (!isCdata && (poolLength(pool) == 0 || poolLastByte(pool) == ' '))
+      if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
 	break;
-      if (!poolAppendByte(pool, ' '))
+      if (!poolAppendChar(pool, XML_T(' ')))
 	return XML_ERROR_NO_MEMORY;
       break;
     case XML_TOK_ENTITY_REF:
       {
-	const char *name = poolStoreString(&temp2Pool, enc,
-					   ptr + enc->minBytesPerChar,
-					   next - enc->minBytesPerChar);
+	const XML_Char *name;
 	ENTITY *entity;
+	XML_Char ch = XmlPredefinedEntityName(enc,
+					      ptr + enc->minBytesPerChar,
+					      next - enc->minBytesPerChar);
+	if (ch) {
+	  if (!poolAppendChar(pool, ch))
+  	    return XML_ERROR_NO_MEMORY;
+	  break;
+	}
+	name = poolStoreString(&temp2Pool, enc,
+			       ptr + enc->minBytesPerChar,
+			       next - enc->minBytesPerChar);
 	if (!name)
 	  return XML_ERROR_NO_MEMORY;
 	entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
 	poolDiscard(&temp2Pool);
 	if (!entity) {
 	  if (dtd.complete) {
-	    errorPtr = ptr;
+	    if (enc == encoding)
+	      eventPtr = ptr;
 	    return XML_ERROR_UNDEFINED_ENTITY;
 	  }
 	}
 	else if (entity->open) {
-	  errorPtr = ptr;
+	  if (enc == encoding)
+	    eventPtr = ptr;
 	  return XML_ERROR_RECURSIVE_ENTITY_REF;
 	}
 	else if (entity->notation) {
-	  errorPtr = ptr;
+	  if (enc == encoding)
+	    eventPtr = ptr;
 	  return XML_ERROR_BINARY_ENTITY_REF;
 	}
-	else if (entity->magic) {
-	  int i;
-	  for (i = 0; i < entity->textLen; i++)
-	    if (!poolAppendByte(pool, entity->textPtr[i]))
-	      return XML_ERROR_NO_MEMORY;
-	}
 	else if (!entity->textPtr) {
-	  errorPtr = ptr;
+	  if (enc == encoding)
+	    eventPtr = ptr;
   	  return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
 	}
 	else {
 	  enum XML_Error result;
-	  const char *textEnd = entity->textPtr + entity->textLen;
+	  const XML_Char *textEnd = entity->textPtr + entity->textLen;
 	  entity->open = 1;
-	  result = appendAttributeValue(parser, utf8, isCdata, entity->textPtr, textEnd, pool);
+	  result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
 	  entity->open = 0;
-	  if (result) {
-	    errorPtr = ptr;
+	  if (result)
 	    return result;
-	  }
 	}
       }
       break;
@@ -1277,7 +1997,7 @@ enum XML_Error storeEntityValue(XML_Parser parser,
 				const char *entityTextPtr,
 				const char *entityTextEnd)
 {
-  const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+  const ENCODING *internalEnc = XmlGetInternalEncoding();
   STRING_POOL *pool = &(dtd.pool);
   entityTextPtr += encoding->minBytesPerChar;
   entityTextEnd -= encoding->minBytesPerChar;
@@ -1286,7 +2006,7 @@ enum XML_Error storeEntityValue(XML_Parser parser,
     int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
     switch (tok) {
     case XML_TOK_PARAM_ENTITY_REF:
-      errorPtr = entityTextPtr;
+      eventPtr = entityTextPtr;
       return XML_ERROR_SYNTAX;
     case XML_TOK_NONE:
       if (declEntity) {
@@ -1308,20 +2028,20 @@ enum XML_Error storeEntityValue(XML_Parser parser,
     case XML_TOK_DATA_NEWLINE:
       if (pool->end == pool->ptr && !poolGrow(pool))
 	return XML_ERROR_NO_MEMORY;
-      *(pool->ptr)++ = '\n';
+      *(pool->ptr)++ = XML_T('\n');
       break;
     case XML_TOK_CHAR_REF:
       {
-	char buf[XML_MAX_BYTES_PER_CHAR];
+	XML_Char buf[XML_ENCODE_MAX];
 	int i;
 	int n = XmlCharRefNumber(encoding, entityTextPtr);
 	if (n < 0) {
-	  errorPtr = entityTextPtr;
+	  eventPtr = entityTextPtr;
 	  return XML_ERROR_BAD_CHAR_REF;
 	}
-	n = XmlEncode(utf8, n, buf);
+	n = XmlEncode(n, (ICHAR *)buf);
 	if (!n) {
-	  errorPtr = entityTextPtr;
+	  eventPtr = entityTextPtr;
 	  return XML_ERROR_BAD_CHAR_REF;
 	}
 	for (i = 0; i < n; i++) {
@@ -1332,10 +2052,10 @@ enum XML_Error storeEntityValue(XML_Parser parser,
       }
       break;
     case XML_TOK_PARTIAL:
-      errorPtr = entityTextPtr;
+      eventPtr = entityTextPtr;
       return XML_ERROR_INVALID_TOKEN;
     case XML_TOK_INVALID:
-      errorPtr = next;
+      eventPtr = next;
       return XML_ERROR_INVALID_TOKEN;
     default:
       abort();
@@ -1346,36 +2066,42 @@ enum XML_Error storeEntityValue(XML_Parser parser,
 }
 
 static void
-normalizeLines(char *s)
+normalizeLines(XML_Char *s)
 {
-  char *p;
-  s = strchr(s, '\r');
-  if (!s)
-    return;
+  XML_Char *p;
+  for (;; s++) {
+    if (*s == XML_T('\0'))
+      return;
+    if (*s == XML_T('\r'))
+      break;
+  }
   p = s;
-  while (*s) {
-    if (*s == '\r') {
-      *p++ = '\n';
-      if (*++s == '\n')
+  do {
+    if (*s == XML_T('\r')) {
+      *p++ = XML_T('\n');
+      if (*++s == XML_T('\n'))
         s++;
     }
     else
       *p++ = *s++;
-  }
-  *p = '\0';
+  } while (*s);
+  *p = XML_T('\0');
 }
 
 static int
 reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
 {
-  const char *target;
-  char *data;
+  const XML_Char *target;
+  XML_Char *data;
   const char *tem;
-  if (!processingInstructionHandler)
+  if (!processingInstructionHandler) {
+    if (defaultHandler)
+      reportDefault(parser, enc, start, end);
     return 1;
-  target = start + enc->minBytesPerChar * 2;
-  tem = target + XmlNameLength(enc, target);
-  target = poolStoreString(&tempPool, enc, target, tem);
+  }
+  start += enc->minBytesPerChar * 2;
+  tem = start + XmlNameLength(enc, start);
+  target = poolStoreString(&tempPool, enc, start, tem);
   if (!target)
     return 0;
   poolFinish(&tempPool);
@@ -1385,22 +2111,50 @@ reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *
   if (!data)
     return 0;
   normalizeLines(data);
-  processingInstructionHandler(userData, target, data);
+  processingInstructionHandler(handlerArg, target, data);
   poolClear(&tempPool);
   return 1;
 }
 
+static void
+reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
+{
+  if (MUST_CONVERT(enc, s)) {
+    for (;;) {
+      ICHAR *dataPtr = (ICHAR *)dataBuf;
+      XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
+      if (s == end) {
+	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+	break;
+      }
+      if (enc == encoding) {
+	eventEndPtr = s;
+	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+	eventPtr = s;
+      }
+      else
+	defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
+    }
+  }
+  else
+    defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
+}
+
+
 static int
-defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const char *value)
+defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
 {
   DEFAULT_ATTRIBUTE *att;
   if (type->nDefaultAtts == type->allocDefaultAtts) {
-    if (type->allocDefaultAtts == 0)
+    if (type->allocDefaultAtts == 0) {
       type->allocDefaultAtts = 8;
-    else
+      type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
+    }
+    else {
       type->allocDefaultAtts *= 2;
-    type->defaultAtts = realloc(type->defaultAtts,
-				type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
+      type->defaultAtts = realloc(type->defaultAtts,
+				  type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
+    }
     if (!type->defaultAtts)
       return 0;
   }
@@ -1418,8 +2172,8 @@ static ATTRIBUTE_ID *
 getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
 {
   ATTRIBUTE_ID *id;
-  const char *name;
-  if (!poolAppendByte(&dtd.pool, 0))
+  const XML_Char *name;
+  if (!poolAppendChar(&dtd.pool, XML_T('\0')))
     return 0;
   name = poolStoreString(&dtd.pool, enc, start, end);
   if (!name)
@@ -1435,25 +2189,88 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const
   return id;
 }
 
+static
+const XML_Char *getOpenEntityNames(XML_Parser parser)
+{
+  HASH_TABLE_ITER iter;
+
+  hashTableIterInit(&iter, &(dtd.generalEntities));
+  for (;;) {
+    const XML_Char *s;
+    ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
+    if (!e)
+      break;
+    if (!e->open)
+      continue;
+    if (poolLength(&tempPool) > 0 && !poolAppendChar(&tempPool, XML_T(' ')))
+      return 0;
+    for (s = e->name; *s; s++)
+      if (!poolAppendChar(&tempPool, *s))
+        return 0;
+  }
+
+  if (!poolAppendChar(&tempPool, XML_T('\0')))
+    return 0;
+  return tempPool.start;
+}
+
+static
+int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames)
+{
+  const XML_Char *s = openEntityNames;
+  while (*openEntityNames != XML_T('\0')) {
+    if (*s == XML_T(' ') || *s == XML_T('\0')) {
+      ENTITY *e;
+      if (!poolAppendChar(&tempPool, XML_T('\0')))
+	return 0;
+      e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
+      if (e)
+	e->open = 1;
+      if (*s == XML_T(' '))
+	s++;
+      openEntityNames = s;
+      poolDiscard(&tempPool);
+    }
+    else {
+      if (!poolAppendChar(&tempPool, *s))
+	return 0;
+      s++;
+    }
+  }
+  return 1;
+}
+
+
+static
+void normalizePublicId(XML_Char *publicId)
+{
+  XML_Char *p = publicId;
+  XML_Char *s;
+  for (s = publicId; *s; s++) {
+    switch (*s) {
+    case XML_T(' '):
+    case XML_T('\r'):
+    case XML_T('\n'):
+      if (p != publicId && p[-1] != XML_T(' '))
+	*p++ = XML_T(' ');
+      break;
+    default:
+      *p++ = *s;
+    }
+  }
+  if (p != publicId && p[-1] == XML_T(' '))
+    --p;
+  *p = XML_T('\0');
+}
+
 static int dtdInit(DTD *p)
 {
-  static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
-  static const char chars[] = { '<', '&', '>', '"', '\'' };
-  int i;
-
   poolInit(&(p->pool));
   hashTableInit(&(p->generalEntities));
-  for (i = 0; i < 5; i++) {
-    ENTITY *entity = (ENTITY *)lookup(&(p->generalEntities), names[i], sizeof(ENTITY));
-    if (!entity)
-      return 0;
-    entity->textPtr = chars + i;
-    entity->textLen = 1;
-    entity->magic = 1;
-  }
   hashTableInit(&(p->elementTypes));
   hashTableInit(&(p->attributeIds));
   p->complete = 1;
+  p->base = 0;
   return 1;
 }
 
@@ -1465,7 +2282,8 @@ static void dtdDestroy(DTD *p)
     ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
     if (!e)
       break;
-    free(e->defaultAtts);
+    if (e->allocDefaultAtts != 0)
+      free(e->defaultAtts);
   }
   hashTableDestroy(&(p->generalEntities));
   hashTableDestroy(&(p->elementTypes));
@@ -1473,6 +2291,124 @@ static void dtdDestroy(DTD *p)
   poolDestroy(&(p->pool));
 }
 
+/* Do a deep copy of the DTD.  Return 0 for out of memory; non-zero otherwise.
+The new DTD has already been initialized. */
+
+static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
+{
+  HASH_TABLE_ITER iter;
+
+  if (oldDtd->base) {
+    const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
+    if (!tem)
+      return 0;
+    newDtd->base = tem;
+  }
+
+  hashTableIterInit(&iter, &(oldDtd->attributeIds));
+
+  /* Copy the attribute id table. */
+
+  for (;;) {
+    ATTRIBUTE_ID *newA;
+    const XML_Char *name;
+    const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
+
+    if (!oldA)
+      break;
+    /* Remember to allocate the scratch byte before the name. */
+    if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
+      return 0;
+    name = poolCopyString(&(newDtd->pool), oldA->name);
+    if (!name)
+      return 0;
+    ++name;
+    newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
+    if (!newA)
+      return 0;
+    newA->maybeTokenized = oldA->maybeTokenized;
+  }
+
+  /* Copy the element type table. */
+
+  hashTableIterInit(&iter, &(oldDtd->elementTypes));
+
+  for (;;) {
+    int i;
+    ELEMENT_TYPE *newE;
+    const XML_Char *name;
+    const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
+    if (!oldE)
+      break;
+    name = poolCopyString(&(newDtd->pool), oldE->name);
+    if (!name)
+      return 0;
+    newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
+    if (!newE)
+      return 0;
+    newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
+    if (!newE->defaultAtts)
+      return 0;
+    newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
+    for (i = 0; i < newE->nDefaultAtts; i++) {
+      newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
+      newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
+      newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
+      if (!newE->defaultAtts[i].value)
+	return 0;
+    }
+  }
+
+  /* Copy the entity table. */
+
+  hashTableIterInit(&iter, &(oldDtd->generalEntities));
+
+  for (;;) {
+    ENTITY *newE;
+    const XML_Char *name;
+    const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
+    if (!oldE)
+      break;
+    name = poolCopyString(&(newDtd->pool), oldE->name);
+    if (!name)
+      return 0;
+    newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
+    if (!newE)
+      return 0;
+    if (oldE->systemId) {
+      const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
+      if (!tem)
+	return 0;
+      newE->systemId = tem;
+      if (oldE->base) {
+	if (oldE->base == oldDtd->base)
+	  newE->base = newDtd->base;
+	tem = poolCopyString(&(newDtd->pool), oldE->base);
+	if (!tem)
+	  return 0;
+	newE->base = tem;
+      }
+    }
+    else {
+      const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
+      if (!tem)
+	return 0;
+      newE->textPtr = tem;
+      newE->textLen = oldE->textLen;
+    }
+    if (oldE->notation) {
+      const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
+      if (!tem)
+	return 0;
+      newE->notation = tem;
+    }
+  }
+
+  newDtd->complete = oldDtd->complete;
+  newDtd->standalone = oldDtd->standalone;
+  return 1;
+}
+
 static
 void poolInit(STRING_POOL *pool)
 {
@@ -1526,13 +2462,13 @@ void poolDestroy(STRING_POOL *pool)
 }
 
 static
-char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
-		 const char *ptr, const char *end)
+XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+		     const char *ptr, const char *end)
 {
   if (!pool->ptr && !poolGrow(pool))
     return 0;
   for (;;) {
-    XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
+    XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
     if (ptr == end)
       break;
     if (!poolGrow(pool))
@@ -1541,10 +2477,34 @@ char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
   return pool->start;
 }
 
+static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
+{
+  do {
+    if (!poolAppendChar(pool, *s))
+      return 0;
+  } while (*s++);
+  s = pool->start;
+  poolFinish(pool);
+  return s;
+}
+
+static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
+{
+  if (!pool->ptr && !poolGrow(pool))
+    return 0;
+  for (; n > 0; --n, s++) {
+    if (!poolAppendChar(pool, *s))
+      return 0;
+
+  }
+  s = pool->start;
+  poolFinish(pool);
+  return s;
+}
 
 static
-char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
-		      const char *ptr, const char *end)
+XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+			  const char *ptr, const char *end)
 {
   if (!poolAppend(pool, enc, ptr, end))
     return 0;
@@ -1572,7 +2532,7 @@ int poolGrow(STRING_POOL *pool)
       pool->freeBlocks->next = pool->blocks;
       pool->blocks = pool->freeBlocks;
       pool->freeBlocks = tem;
-      memcpy(pool->blocks->s, pool->start, pool->end - pool->start);
+      memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
       pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
       pool->start = pool->blocks->s;
       pool->end = pool->start + pool->blocks->size;
@@ -1581,7 +2541,7 @@ int poolGrow(STRING_POOL *pool)
   }
   if (pool->blocks && pool->start == pool->blocks->s) {
     int blockSize = (pool->end - pool->start)*2;
-    pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
+    pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
     if (!pool->blocks)
       return 0;
     pool->blocks->size = blockSize;
@@ -1596,13 +2556,13 @@ int poolGrow(STRING_POOL *pool)
       blockSize = INIT_BLOCK_SIZE;
     else
       blockSize *= 2;
-    tem = malloc(offsetof(BLOCK, s) + blockSize);
+    tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
     if (!tem)
       return 0;
     tem->size = blockSize;
     tem->next = pool->blocks;
     pool->blocks = tem;
-    memcpy(tem->s, pool->start, pool->ptr - pool->start);
+    memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
     pool->ptr = tem->s + (pool->ptr - pool->start);
     pool->start = tem->s;
     pool->end = tem->s + blockSize;
diff --git a/modules/xml/expat/xmlparse/xmlparse.h b/modules/xml/expat/xmlparse/xmlparse.h
index 216ec6d07861..13d5885ca2ab 100644
--- a/modules/xml/expat/xmlparse/xmlparse.h
+++ b/modules/xml/expat/xmlparse/xmlparse.h
@@ -31,32 +31,199 @@ extern "C" {
 
 typedef void *XML_Parser;
 
-/* Constructs a new parser; encoding should be the name of the charset from
-the Content-Type header if the Content-Type is text/xml, or null otherwise. */
+#ifdef XML_UNICODE_WCHAR_T
 
-XML_Parser XMLPARSEAPI
-XML_ParserCreate(const char *encoding);
+/* XML_UNICODE_WCHAR_T will work only if sizeof(wchar_t) == 2 and wchar_t
+uses Unicode. */
+/* Information is UTF-16 encoded as wchar_ts */
+
+#ifndef XML_UNICODE
+#define XML_UNICODE
+#endif
+
+#include <stddef.h>
+typedef wchar_t XML_Char;
+typedef wchar_t XML_LChar;
+
+#else /* not XML_UNICODE_WCHAR_T */
+
+#ifdef XML_UNICODE
+
+/* Information is UTF-16 encoded as unsigned shorts */
+typedef unsigned short XML_Char;
+typedef char XML_LChar;
+
+#else /* not XML_UNICODE */
 
 /* Information is UTF-8 encoded. */
+typedef char XML_Char;
+typedef char XML_LChar;
 
-/* atts is array of name/value pairs, terminated by NULL;
-   names and values are '\0' terminated. */
+#endif /* not XML_UNICODE */
+
+#endif /* not XML_UNICODE_WCHAR_T */
+
+
+/* Constructs a new parser; encoding is the encoding specified by the external
+protocol or null if there is none specified. */
+
+XML_Parser XMLPARSEAPI
+XML_ParserCreate(const XML_Char *encoding);
+
+
+/* atts is array of name/value pairs, terminated by 0;
+   names and values are 0 terminated. */
 
 typedef void (*XML_StartElementHandler)(void *userData,
-					const char *name,
-					const char **atts);
+					const XML_Char *name,
+					const XML_Char **atts);
 
 typedef void (*XML_EndElementHandler)(void *userData,
-				      const char *name);
+				      const XML_Char *name);
 
+/* s is not 0 terminated. */
 typedef void (*XML_CharacterDataHandler)(void *userData,
-					 const char *s,
+					 const XML_Char *s,
 					 int len);
 
-/* target and data are '\0' terminated */
+/* target and data are 0 terminated */
 typedef void (*XML_ProcessingInstructionHandler)(void *userData,
-						 const char *target,
-						 const char *data);
+						 const XML_Char *target,
+						 const XML_Char *data);
+
+/* This is called for any characters in the XML document for
+which there is no applicable handler.  This includes both
+characters that are part of markup which is of a kind that is
+not reported (comments, markup declarations), or characters
+that are part of a construct which could be reported but
+for which no handler has been supplied. The characters are passed
+exactly as they were in the XML document except that
+they will be encoded in UTF-8.  Line boundaries are not normalized.
+Note that a byte order mark character is not passed to the default handler.
+If a default handler is set, internal entity references
+are not expanded. There are no guarantees about
+how characters are divided between calls to the default handler:
+for example, a comment might be split between multiple calls. */
+
+typedef void (*XML_DefaultHandler)(void *userData,
+				   const XML_Char *s,
+				   int len);
+
+/* This is called for a declaration of an unparsed (NDATA)
+entity.  The base argument is whatever was set by XML_SetBase.
+The entityName, systemId and notationName arguments will never be null.
+The other arguments may be. */
+
+typedef void (*XML_UnparsedEntityDeclHandler)(void *userData,
+					      const XML_Char *entityName,
+					      const XML_Char *base,
+					      const XML_Char *systemId,
+					      const XML_Char *publicId,
+					      const XML_Char *notationName);
+
+/* This is called for a declaration of notation.
+The base argument is whatever was set by XML_SetBase.
+The notationName will never be null.  The other arguments can be. */
+
+typedef void (*XML_NotationDeclHandler)(void *userData,
+					const XML_Char *notationName,
+					const XML_Char *base,
+					const XML_Char *systemId,
+					const XML_Char *publicId);
+
+/* This is called for a reference to an external parsed general entity.
+The referenced entity is not automatically parsed.
+The application can parse it immediately or later using
+XML_ExternalEntityParserCreate.
+The parser argument is the parser parsing the entity containing the reference;
+it can be passed as the parser argument to XML_ExternalEntityParserCreate.
+The systemId argument is the system identifier as specified in the entity declaration;
+it will not be null.
+The base argument is the system identifier that should be used as the base for
+resolving systemId if systemId was relative; this is set by XML_SetBase;
+it may be null.
+The publicId argument is the public identifier as specified in the entity declaration,
+or null if none was specified; the whitespace in the public identifier
+will have been normalized as required by the XML spec.
+The openEntityNames argument is a space-separated list of the names of the entities
+that are open for the parse of this entity (including the name of the referenced
+entity); this can be passed as the openEntityNames argument to
+XML_ExternalEntityParserCreate; openEntityNames is valid only until the handler
+returns, so if the referenced entity is to be parsed later, it must be copied.
+The handler should return 0 if processing should not continue because of
+a fatal error in the handling of the external entity.
+In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
+error.
+Note that unlike other handlers the first argument is the parser, not userData. */
+
+typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser,
+					    const XML_Char *openEntityNames,
+					    const XML_Char *base,
+					    const XML_Char *systemId,
+					    const XML_Char *publicId);
+
+/* This structure is filled in by the XML_UnknownEncodingHandler
+to provide information to the parser about encodings that are unknown
+to the parser.
+The map[b] member gives information about byte sequences
+whose first byte is b.
+If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c.
+If map[b] is -1, then the byte sequence is malformed.
+If map[b] is -n, where n >= 2, then b is the first byte of an n-byte
+sequence that encodes a single Unicode scalar value.
+The data member will be passed as the first argument to the convert function.
+The convert function is used to convert multibyte sequences;
+s will point to a n-byte sequence where map[(unsigned char)*s] == -n.
+The convert function must return the Unicode scalar value
+represented by this byte sequence or -1 if the byte sequence is malformed.
+The convert function may be null if the encoding is a single-byte encoding,
+that is if map[b] >= -1 for all bytes b.
+When the parser is finished with the encoding, then if release is not null,
+it will call release passing it the data member;
+once release has been called, the convert function will not be called again.
+
+Expat places certain restrictions on the encodings that are supported
+using this mechanism.
+
+1. Every ASCII character that can appear in a well-formed XML document,
+other than the characters
+
+  $@\^`{}~
+
+must be represented by a single byte, and that byte must be the
+same byte that represents that character in ASCII.
+
+2. No character may require more than 4 bytes to encode.
+
+3. All characters encoded must have Unicode scalar values <= 0xFFFF,
+(ie characters that would be encoded by surrogates in UTF-16
+are  not allowed).  Note that this restriction doesn't apply to
+the built-in support for UTF-8 and UTF-16.
+
+4. No Unicode character may be encoded by more than one distinct sequence
+of bytes. */
+
+typedef struct {
+  int map[256];
+  void *data;
+  int (*convert)(void *data, const char *s);
+  void (*release)(void *data);
+} XML_Encoding;
+
+/* This is called for an encoding that is unknown to the parser.
+The encodingHandlerData argument is that which was passed as the
+second argument to XML_SetUnknownEncodingHandler.
+The name argument gives the name of the encoding as specified in
+the encoding declaration.
+If the callback can provide information about the encoding,
+it must fill in the XML_Encoding structure, and return 1.
+Otherwise it must return 0.
+If info does not describe a suitable encoding,
+then the parser will return an XML_UNKNOWN_ENCODING error. */
+
+typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData,
+					  const XML_Char *name,
+					  XML_Encoding *info);
 
 void XMLPARSEAPI
 XML_SetElementHandler(XML_Parser parser,
@@ -71,10 +238,62 @@ void XMLPARSEAPI
 XML_SetProcessingInstructionHandler(XML_Parser parser,
 				    XML_ProcessingInstructionHandler handler);
 
+void XMLPARSEAPI
+XML_SetDefaultHandler(XML_Parser parser,
+		      XML_DefaultHandler handler);
+
+void XMLPARSEAPI
+XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
+				 XML_UnparsedEntityDeclHandler handler);
+
+void XMLPARSEAPI
+XML_SetNotationDeclHandler(XML_Parser parser,
+			   XML_NotationDeclHandler handler);
+
+void XMLPARSEAPI
+XML_SetExternalEntityRefHandler(XML_Parser parser,
+				XML_ExternalEntityRefHandler handler);
+
+void XMLPARSEAPI
+XML_SetUnknownEncodingHandler(XML_Parser parser,
+			      XML_UnknownEncodingHandler handler,
+			      void *encodingHandlerData);
+
+/* This can be called within a handler for a start element, end element,
+processing instruction or character data.  It causes the corresponding
+markup to be passed to the default handler.
+Within the expansion of an internal entity, nothing will be passed
+to the default handler, although this usually will not happen since
+setting a default handler inhibits expansion of internal entities. */
+void XMLPARSEAPI XML_DefaultCurrent(XML_Parser parser);
+
 /* This value is passed as the userData argument to callbacks. */
 void XMLPARSEAPI
 XML_SetUserData(XML_Parser parser, void *userData);
 
+/* Returns the last value set by XML_SetUserData or null. */
+#define XML_GetUserData(parser) (*(void **)(parser))
+
+/* If this function is called, then the parser will be passed
+as the first argument to callbacks instead of userData.
+The userData will still be accessible using XML_GetUserData. */
+
+void XMLPARSEAPI
+XML_UseParserAsHandlerArg(XML_Parser parser);
+
+/* Sets the base to be used for resolving relative URIs in system identifiers in
+declarations.  Resolving relative identifiers is left to the application:
+this value will be passed through as the base argument to the
+XML_ExternalEntityRefHandler, XML_NotationDeclHandler
+and XML_UnparsedEntityDeclHandler. The base argument will be copied.
+Returns zero if out of memory, non-zero otherwise. */
+
+int XMLPARSEAPI
+XML_SetBase(XML_Parser parser, const XML_Char *base);
+
+const XML_Char XMLPARSEAPI *
+XML_GetBase(XML_Parser parser);
+
 /* Parses some input. Returns 0 if a fatal error is detected.
 The last call to XML_Parse must have isFinal true;
 len may be zero for this call (or any other). */
@@ -87,8 +306,20 @@ XML_GetBuffer(XML_Parser parser, int len);
 int XMLPARSEAPI
 XML_ParseBuffer(XML_Parser parser, int len, int isFinal);
 
-/* If XML_Parser or XML_ParseEnd have returned 0, then XML_GetError*
-returns information about the error. */
+/* Creates an XML_Parser object that can parse an external general entity;
+openEntityNames is a space-separated list of the names of the entities that are open
+for the parse of this entity (including the name of this one);
+encoding is the externally specified encoding,
+or null if there is no externally specified encoding.
+This can be called at any point after the first call to an ExternalEntityRefHandler
+so longer as the parser has not yet been freed.
+The new parser is completely independent and may safely be used in a separate thread.
+The handlers and userData are initialized from the parser argument.
+Returns 0 if out of memory.  Otherwise returns a new XML_Parser object. */
+XML_Parser XMLPARSEAPI
+XML_ExternalEntityParserCreate(XML_Parser parser,
+			       const XML_Char *openEntityNames,
+			       const XML_Char *encoding);
 
 enum XML_Error {
   XML_ERROR_NONE,
@@ -110,19 +341,39 @@ enum XML_Error {
   XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF,
   XML_ERROR_MISPLACED_XML_PI,
   XML_ERROR_UNKNOWN_ENCODING,
-  XML_ERROR_INCORRECT_ENCODING
+  XML_ERROR_INCORRECT_ENCODING,
+  XML_ERROR_UNCLOSED_CDATA_SECTION,
+  XML_ERROR_EXTERNAL_ENTITY_HANDLING
 };
 
-int XMLPARSEAPI XML_GetErrorCode(XML_Parser parser);
-int XMLPARSEAPI XML_GetErrorLineNumber(XML_Parser parser);
-int XMLPARSEAPI XML_GetErrorColumnNumber(XML_Parser parser);
-long XMLPARSEAPI XML_GetErrorByteIndex(XML_Parser parser);
+/* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode
+returns information about the error. */
 
+enum XML_Error XMLPARSEAPI XML_GetErrorCode(XML_Parser parser);
+
+/* These functions return information about the current parse location.
+They may be called when XML_Parse or XML_ParseBuffer return 0;
+in this case the location is the location of the character at which
+the error was detected.
+They may also be called from any other callback called to report
+some parse event; in this the location is the location of the first
+of the sequence of characters that generated the event. */
+
+int XMLPARSEAPI XML_GetCurrentLineNumber(XML_Parser parser);
+int XMLPARSEAPI XML_GetCurrentColumnNumber(XML_Parser parser);
+long XMLPARSEAPI XML_GetCurrentByteIndex(XML_Parser parser);
+
+/* For backwards compatibility with previous versions. */
+#define XML_GetErrorLineNumber XML_GetCurrentLineNumber
+#define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber
+#define XML_GetErrorByteIndex XML_GetCurrentByteIndex
+
+/* Frees memory used by the parser. */
 void XMLPARSEAPI
 XML_ParserFree(XML_Parser parser);
 
-const char XMLPARSEAPI *
-XML_ErrorString(int code);
+/* Returns a string describing the error. */
+const XML_LChar XMLPARSEAPI *XML_ErrorString(int code);
 
 #ifdef __cplusplus
 }
diff --git a/modules/xml/expat/xmltok/xmlrole.c b/modules/xml/expat/xmltok/xmlrole.c
index 340147ee6926..72be89bff8fa 100644
--- a/modules/xml/expat/xmltok/xmlrole.c
+++ b/modules/xml/expat/xmltok/xmlrole.c
@@ -594,7 +594,7 @@ int notation4(PROLOG_STATE *state,
     return XML_ROLE_NOTATION_SYSTEM_ID;
   case XML_TOK_DECL_CLOSE:
     state->handler = internalSubset;
-    return XML_ROLE_NONE;
+    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
   }
   return syntaxError(state);
 }
diff --git a/modules/xml/expat/xmltok/xmlrole.h b/modules/xml/expat/xmltok/xmlrole.h
index 4f4655f821c8..ecbcc26dff05 100644
--- a/modules/xml/expat/xmltok/xmlrole.h
+++ b/modules/xml/expat/xmltok/xmlrole.h
@@ -44,6 +44,7 @@ enum {
   XML_ROLE_ENTITY_NOTATION_NAME,
   XML_ROLE_NOTATION_NAME,
   XML_ROLE_NOTATION_SYSTEM_ID,
+  XML_ROLE_NOTATION_NO_SYSTEM_ID,
   XML_ROLE_NOTATION_PUBLIC_ID,
   XML_ROLE_ATTRIBUTE_NAME,
   XML_ROLE_ATTRIBUTE_TYPE_CDATA,
diff --git a/modules/xml/expat/xmltok/xmltok.c b/modules/xml/expat/xmltok/xmltok.c
index aba5e55c78ee..bcd06eb9f99f 100644
--- a/modules/xml/expat/xmltok/xmltok.c
+++ b/modules/xml/expat/xmltok/xmltok.c
@@ -23,7 +23,7 @@ Contributor(s):
 #include "nametab.h"
 
 #define VTABLE1 \
-  { PREFIX(prologTok), PREFIX(contentTok) }, \
+  { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
   { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
   PREFIX(sameName), \
   PREFIX(nameMatchesAscii), \
@@ -31,14 +31,11 @@ Contributor(s):
   PREFIX(skipS), \
   PREFIX(getAtts), \
   PREFIX(charRefNumber), \
+  PREFIX(predefinedEntityName), \
   PREFIX(updatePosition), \
   PREFIX(isPublicId)
 
-#define VTABLE2 \
-  PREFIX(encode), \
-  { PREFIX(toUtf8) }
-
-#define VTABLE VTABLE1, VTABLE2
+#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
 
 #define UCS2_GET_NAMING(pages, hi, lo) \
    (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
@@ -81,11 +78,79 @@ We need 8 bits to index into pages, 3 bits to add to that index and
 
 #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
 
+static
+int isNever(const ENCODING *enc, const char *p)
+{
+  return 0;
+}
+
+static
+int utf8_isName2(const ENCODING *enc, const char *p)
+{
+  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
+}
+
+static
+int utf8_isName3(const ENCODING *enc, const char *p)
+{
+  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
+}
+
+#define utf8_isName4 isNever
+
+static
+int utf8_isNmstrt2(const ENCODING *enc, const char *p)
+{
+  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
+}
+
+static
+int utf8_isNmstrt3(const ENCODING *enc, const char *p)
+{
+  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
+}
+
+#define utf8_isNmstrt4 isNever
+
+#define utf8_isInvalid2 isNever
+
+static
+int utf8_isInvalid3(const ENCODING *enc, const char *p)
+{
+  return UTF8_INVALID3((const unsigned char *)p);
+}
+
+static
+int utf8_isInvalid4(const ENCODING *enc, const char *p)
+{
+  return UTF8_INVALID4((const unsigned char *)p);
+}
+
 struct normal_encoding {
   ENCODING enc;
   unsigned char type[256];
+  int (*isName2)(const ENCODING *, const char *);
+  int (*isName3)(const ENCODING *, const char *);
+  int (*isName4)(const ENCODING *, const char *);
+  int (*isNmstrt2)(const ENCODING *, const char *);
+  int (*isNmstrt3)(const ENCODING *, const char *);
+  int (*isNmstrt4)(const ENCODING *, const char *);
+  int (*isInvalid2)(const ENCODING *, const char *);
+  int (*isInvalid3)(const ENCODING *, const char *);
+  int (*isInvalid4)(const ENCODING *, const char *);
 };
 
+#define NORMAL_VTABLE(E) \
+ E ## isName2, \
+ E ## isName3, \
+ E ## isName4, \
+ E ## isNmstrt2, \
+ E ## isNmstrt3, \
+ E ## isNmstrt4, \
+ E ## isInvalid2, \
+ E ## isInvalid3, \
+ E ## isInvalid4
+ 
 static int checkCharRefNumber(int);
 
 #include "xmltok_impl.h"
@@ -95,12 +160,16 @@ static int checkCharRefNumber(int);
 #define BYTE_TYPE(enc, p) \
   (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
 #define BYTE_TO_ASCII(enc, p) (*p)
-#define IS_NAME_CHAR(enc, p, n) UTF8_GET_NAMING(namePages, p, n)
-#define IS_NMSTRT_CHAR(enc, p, n) UTF8_GET_NAMING(nmstrtPages, p, n)
+
+#define IS_NAME_CHAR(enc, p, n) \
+ (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
+#define IS_NMSTRT_CHAR(enc, p, n) \
+ (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
 #define IS_INVALID_CHAR(enc, p, n) \
-((n) == 3 \
-  ? UTF8_INVALID3((const unsigned char *)(p)) \
-  : ((n) == 4 ? UTF8_INVALID4((const unsigned char *)(p)) : 0))
+ (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
+
+#define IS_NAME_CHAR_MINBPC(enc, p) (0)
+#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
 
 /* c is an ASCII character */
 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
@@ -113,51 +182,18 @@ static int checkCharRefNumber(int);
 #undef BYTE_TO_ASCII
 #undef CHAR_MATCHES
 #undef IS_NAME_CHAR
+#undef IS_NAME_CHAR_MINBPC
 #undef IS_NMSTRT_CHAR
+#undef IS_NMSTRT_CHAR_MINBPC
 #undef IS_INVALID_CHAR
 
-enum {
-  /* cvalN is value of masked first byte of N byte sequence */
-  cval1 = 0x00,
-  cval2 = 0xc0,
-  cval3 = 0xe0,
-  cval4 = 0xf0,
-  /* minN is minimum legal resulting value for N byte sequence */
-  min2 = 0x80,
-  min3 = 0x800,
-  min4 = 0x10000
+enum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */
+  UTF8_cval1 = 0x00,
+  UTF8_cval2 = 0xc0,
+  UTF8_cval3 = 0xe0,
+  UTF8_cval4 = 0xf0
 };
 
-static
-int utf8_encode(const ENCODING *enc, int c, char *buf)
-{
-  if (c < 0)
-    return 0;
-  if (c < min2) {
-    buf[0] = (c | cval1);
-    return 1;
-  }
-  if (c < min3) {
-    buf[0] = ((c >> 6) | cval2);
-    buf[1] = ((c & 0x3f) | 0x80);
-    return 2;
-  }
-  if (c < min4) {
-    buf[0] = ((c >> 12) | cval3);
-    buf[1] = (((c >> 6) & 0x3f) | 0x80);
-    buf[2] = ((c & 0x3f) | 0x80);
-    return 3;
-  }
-  if (c < 0x110000) {
-    buf[0] = ((c >> 18) | cval4);
-    buf[1] = (((c >> 12) & 0x3f) | 0x80);
-    buf[2] = (((c >> 6) & 0x3f) | 0x80);
-    buf[3] = ((c & 0x3f) | 0x80);
-    return 4;
-  }
-  return 0;
-}
-
 static
 void utf8_toUtf8(const ENCODING *enc,
 		 const char **fromP, const char *fromLim,
@@ -177,34 +213,63 @@ void utf8_toUtf8(const ENCODING *enc,
   *toP = to;
 }
 
+static
+void utf8_toUtf16(const ENCODING *enc,
+		  const char **fromP, const char *fromLim,
+		  unsigned short **toP, const unsigned short *toLim)
+{
+  unsigned short *to = *toP;
+  const char *from = *fromP;
+  while (from != fromLim && to != toLim) {
+    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
+    case BT_LEAD2:
+      *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
+      from += 2;
+      break;
+    case BT_LEAD3:
+      *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
+      from += 3;
+      break;
+    case BT_LEAD4:
+      {
+	unsigned long n;
+	if (to + 1 == toLim)
+	  break;
+	n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
+	n -= 0x10000;
+	to[0] = (unsigned short)((n >> 10) | 0xD800);
+	to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
+	to += 2;
+	from += 4;
+      }
+      break;
+    default:
+      *to++ = *from++;
+      break;
+    }
+  }
+  *fromP = from;
+  *toP = to;
+}
+
 static const struct normal_encoding utf8_encoding = {
-  { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 },
+  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
   {
 #include "asciitab.h"
 #include "utf8tab.h"
-  }
+  },
+  NORMAL_VTABLE(utf8_)
 };
 
 static const struct normal_encoding internal_utf8_encoding = {
-  { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 },
+  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
   {
 #include "iasciitab.h"
 #include "utf8tab.h"
-  }
+  },
+  NORMAL_VTABLE(utf8_)
 };
 
-static
-int latin1_encode(const ENCODING *enc, int c, char *buf)
-{
-  if (c < 0)
-    return 0;
-  if (c <= 0xFF) {
-    buf[0] = (char)c;
-    return 1;
-  }
-  return 0;
-}
-
 static
 void latin1_toUtf8(const ENCODING *enc,
 		   const char **fromP, const char *fromLim,
@@ -218,7 +283,7 @@ void latin1_toUtf8(const ENCODING *enc,
     if (c & 0x80) {
       if (toLim - *toP < 2)
 	break;
-      *(*toP)++ = ((c >> 6) | cval2);
+      *(*toP)++ = ((c >> 6) | UTF8_cval2);
       *(*toP)++ = ((c & 0x3f) | 0x80);
       (*fromP)++;
     }
@@ -230,15 +295,39 @@ void latin1_toUtf8(const ENCODING *enc,
   }
 }
 
+static
+void latin1_toUtf16(const ENCODING *enc,
+		    const char **fromP, const char *fromLim,
+		    unsigned short **toP, const unsigned short *toLim)
+{
+  while (*fromP != fromLim && *toP != toLim)
+    *(*toP)++ = (unsigned char)*(*fromP)++;
+}
+
 static const struct normal_encoding latin1_encoding = {
-  { VTABLE1, latin1_encode, { latin1_toUtf8 }, 1 },
+  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
   {
 #include "asciitab.h"
 #include "latin1tab.h"
   }
 };
 
-#define latin1tab (latin1_encoding.type)
+static
+void ascii_toUtf8(const ENCODING *enc,
+		  const char **fromP, const char *fromLim,
+		  char **toP, const char *toLim)
+{
+  while (*fromP != fromLim && *toP != toLim)
+    *(*toP)++ = *(*fromP)++;
+}
+
+static const struct normal_encoding ascii_encoding = {
+  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
+  {
+#include "asciitab.h"
+/* BT_NONXML == 0 */
+  }
+};
 
 #undef PREFIX
 
@@ -260,25 +349,6 @@ static int unicode_byte_type(char hi, char lo)
   return BT_NONASCII;
 }
 
-#define DEFINE_UTF16_ENCODE \
-static \
-int PREFIX(encode)(const ENCODING *enc, int charNum, char *buf) \
-{ \
-  if (charNum < 0) \
-    return 0; \
-  if (charNum < 0x10000) { \
-    SET2(buf, charNum); \
-    return 2; \
-  } \
-  if (charNum < 0x110000) { \
-    charNum -= 0x10000; \
-    SET2(buf, (charNum >> 10) + 0xD800); \
-    SET2(buf + 2, (charNum & 0x3FF) + 0xDC00); \
-    return 4; \
-  } \
-  return 0; \
-}
-
 #define DEFINE_UTF16_TO_UTF8 \
 static \
 void PREFIX(toUtf8)(const ENCODING *enc, \
@@ -308,7 +378,7 @@ void PREFIX(toUtf8)(const ENCODING *enc, \
         *fromP = from; \
 	return; \
       } \
-      *(*toP)++ = ((lo >> 6) | (hi << 2) |  cval2); \
+      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \
       *(*toP)++ = ((lo & 0x3f) | 0x80); \
       break; \
     default: \
@@ -317,7 +387,7 @@ void PREFIX(toUtf8)(const ENCODING *enc, \
 	return; \
       } \
       /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
-      *(*toP)++ = ((hi >> 4) | cval3); \
+      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
       *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
       *(*toP)++ = ((lo & 0x3f) | 0x80); \
       break; \
@@ -327,7 +397,7 @@ void PREFIX(toUtf8)(const ENCODING *enc, \
 	return; \
       } \
       plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
-      *(*toP)++ = ((plane >> 2) | cval4); \
+      *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
       *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
       from += 2; \
       lo2 = GET_LO(from); \
@@ -342,15 +412,33 @@ void PREFIX(toUtf8)(const ENCODING *enc, \
   *fromP = from; \
 }
 
+#define DEFINE_UTF16_TO_UTF16 \
+static \
+void PREFIX(toUtf16)(const ENCODING *enc, \
+		     const char **fromP, const char *fromLim, \
+		     unsigned short **toP, const unsigned short *toLim) \
+{ \
+  /* Avoid copying first half only of surrogate */ \
+  if (fromLim - *fromP > ((toLim - *toP) << 1) \
+      && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
+    fromLim -= 2; \
+  for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
+    *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
+}
+
 #define PREFIX(ident) little2_ ## ident
 #define MINBPC 2
 #define BYTE_TYPE(enc, p) \
- ((p)[1] == 0 ? latin1tab[(unsigned char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
+ ((p)[1] == 0 \
+  ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
+  : unicode_byte_type((p)[1], (p)[0]))
 #define BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
 #define CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
-#define IS_NAME_CHAR(enc, p, n) \
+#define IS_NAME_CHAR(enc, p, n) (0)
+#define IS_NAME_CHAR_MINBPC(enc, p) \
   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
-#define IS_NMSTRT_CHAR(enc, p, n) \
+#define IS_NMSTRT_CHAR(enc, p, n) (0)
+#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
 
 #include "xmltok_impl.c"
@@ -360,8 +448,8 @@ void PREFIX(toUtf8)(const ENCODING *enc, \
 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
 
-DEFINE_UTF16_ENCODE
 DEFINE_UTF16_TO_UTF8
+DEFINE_UTF16_TO_UTF16
 
 #undef SET2
 #undef GET_LO
@@ -371,10 +459,32 @@ DEFINE_UTF16_TO_UTF8
 #undef BYTE_TO_ASCII
 #undef CHAR_MATCHES
 #undef IS_NAME_CHAR
+#undef IS_NAME_CHAR_MINBPC
 #undef IS_NMSTRT_CHAR
+#undef IS_NMSTRT_CHAR_MINBPC
 #undef IS_INVALID_CHAR
 
-static const struct encoding little2_encoding = { VTABLE, 2 };
+static const struct normal_encoding little2_encoding = { 
+  { VTABLE, 2, 0,
+#if BYTE_ORDER == 12
+    1
+#else
+    0
+#endif
+  },
+#include "asciitab.h"
+#include "latin1tab.h"
+};
+
+#if BYTE_ORDER != 21
+
+static const struct normal_encoding internal_little2_encoding = { 
+  { VTABLE, 2, 0, 1 },
+#include "iasciitab.h"
+#include "latin1tab.h"
+};
+
+#endif
 
 #undef PREFIX
 
@@ -382,12 +492,16 @@ static const struct encoding little2_encoding = { VTABLE, 2 };
 #define MINBPC 2
 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
 #define BYTE_TYPE(enc, p) \
- ((p)[0] == 0 ? latin1tab[(unsigned char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
+ ((p)[0] == 0 \
+  ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
+  : unicode_byte_type((p)[0], (p)[1]))
 #define BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
 #define CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
-#define IS_NAME_CHAR(enc, p, n) \
+#define IS_NAME_CHAR(enc, p, n) 0
+#define IS_NAME_CHAR_MINBPC(enc, p) \
   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
-#define IS_NMSTRT_CHAR(enc, p, n) \
+#define IS_NMSTRT_CHAR(enc, p, n) (0)
+#define IS_NMSTRT_CHAR_MINBPC(enc, p) \
   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
 
 #include "xmltok_impl.c"
@@ -397,8 +511,8 @@ static const struct encoding little2_encoding = { VTABLE, 2 };
 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
 
-DEFINE_UTF16_ENCODE
 DEFINE_UTF16_TO_UTF8
+DEFINE_UTF16_TO_UTF16
 
 #undef SET2
 #undef GET_LO
@@ -408,10 +522,32 @@ DEFINE_UTF16_TO_UTF8
 #undef BYTE_TO_ASCII
 #undef CHAR_MATCHES
 #undef IS_NAME_CHAR
+#undef IS_NAME_CHAR_MINBPC
 #undef IS_NMSTRT_CHAR
+#undef IS_NMSTRT_CHAR_MINBPC
 #undef IS_INVALID_CHAR
 
-static const struct encoding big2_encoding = { VTABLE, 2 };
+static const struct normal_encoding big2_encoding = {
+  { VTABLE, 2, 0,
+#if BYTE_ORDER == 21
+  1
+#else
+  0
+#endif
+  },
+#include "asciitab.h"
+#include "latin1tab.h"
+};
+
+#if BYTE_ORDER != 12
+
+static const struct normal_encoding internal_big2_encoding = {
+  { VTABLE, 2, 0, 1 },
+#include "iasciitab.h"
+#include "latin1tab.h"
+};
+
+#endif
 
 #undef PREFIX
 
@@ -454,18 +590,18 @@ int initScan(const ENCODING *enc, int state, const char *ptr, const char *end,
   else {
     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
     case 0x003C:
-      *encPtr = &big2_encoding;
+      *encPtr = &big2_encoding.enc;
       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
     case 0xFEFF:
       *nextTokPtr = ptr + 2;
-      *encPtr = &big2_encoding;
+      *encPtr = &big2_encoding.enc;
       return XML_TOK_BOM;
     case 0x3C00:
-      *encPtr = &little2_encoding;
+      *encPtr = &little2_encoding.enc;
       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
     case 0xFFFE:
       *nextTokPtr = ptr + 2;
-      *encPtr = &little2_encoding;
+      *encPtr = &little2_encoding.enc;
       return XML_TOK_BOM;
     }
   }
@@ -494,13 +630,21 @@ void initUpdatePosition(const ENCODING *enc, const char *ptr,
   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
 }
 
-const ENCODING *XmlGetInternalEncoding(int e)
+const ENCODING *XmlGetUtf8InternalEncoding()
 {
-  switch (e) {
-  case XML_UTF8_ENCODING:
-    return &internal_utf8_encoding.enc;
-  }
-  return 0;
+  return &internal_utf8_encoding.enc;
+}
+
+const ENCODING *XmlGetUtf16InternalEncoding()
+{
+#if BYTE_ORDER == 12
+  return &internal_little2_encoding.enc;
+#elif BYTE_ORDER == 21
+  return &internal_big2_encoding.enc;
+#else
+  const short n = 1;
+  return *(const char *)&n ? &internal_little2_encoding.enc : &internal_big2_encoding.enc;
+#endif
 }
 
 int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
@@ -514,6 +658,10 @@ int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
       *encPtr = &utf8_encoding.enc;
       return 1;
     }
+    if (streqci(name, "US-ASCII")) {
+      *encPtr = &ascii_encoding.enc;
+      return 1;
+    }
     if (!streqci(name, "UTF-16"))
       return 0;
   }
@@ -531,7 +679,7 @@ int toAscii(const ENCODING *enc, const char *ptr, const char *end)
 {
   char buf[1];
   char *p = buf;
-  XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + 1);
+  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
   if (p == buf)
     return -1;
   else
@@ -641,7 +789,7 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e
   char buf[ENCODING_MAX];
   char *p = buf;
   int i;
-  XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + ENCODING_MAX - 1);
+  XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
   if (ptr != end)
     return 0;
   *p = 0;
@@ -653,11 +801,13 @@ const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *e
     return &utf8_encoding.enc;
   if (streqci(buf, "ISO-8859-1"))
     return &latin1_encoding.enc;
+  if (streqci(buf, "US-ASCII"))
+    return &ascii_encoding.enc;
   if (streqci(buf, "UTF-16")) {
     static const unsigned short n = 1;
     if (enc->minBytesPerChar == 2)
       return enc;
-    return &big2_encoding;
+    return &big2_encoding.enc;
   }
   return 0;  
 }
@@ -757,3 +907,229 @@ int checkCharRefNumber(int result)
   return result;
 }
 
+int XmlUtf8Encode(int c, char *buf)
+{
+  enum {
+    /* minN is minimum legal resulting value for N byte sequence */
+    min2 = 0x80,
+    min3 = 0x800,
+    min4 = 0x10000
+  };
+
+  if (c < 0)
+    return 0;
+  if (c < min2) {
+    buf[0] = (c | UTF8_cval1);
+    return 1;
+  }
+  if (c < min3) {
+    buf[0] = ((c >> 6) | UTF8_cval2);
+    buf[1] = ((c & 0x3f) | 0x80);
+    return 2;
+  }
+  if (c < min4) {
+    buf[0] = ((c >> 12) | UTF8_cval3);
+    buf[1] = (((c >> 6) & 0x3f) | 0x80);
+    buf[2] = ((c & 0x3f) | 0x80);
+    return 3;
+  }
+  if (c < 0x110000) {
+    buf[0] = ((c >> 18) | UTF8_cval4);
+    buf[1] = (((c >> 12) & 0x3f) | 0x80);
+    buf[2] = (((c >> 6) & 0x3f) | 0x80);
+    buf[3] = ((c & 0x3f) | 0x80);
+    return 4;
+  }
+  return 0;
+}
+
+int XmlUtf16Encode(int charNum, unsigned short *buf)
+{
+  if (charNum < 0)
+    return 0;
+  if (charNum < 0x10000) {
+    buf[0] = charNum;
+    return 1;
+  }
+  if (charNum < 0x110000) {
+    charNum -= 0x10000;
+    buf[0] = (charNum >> 10) + 0xD800;
+    buf[1] = (charNum & 0x3FF) + 0xDC00;
+    return 2;
+  }
+  return 0;
+}
+
+struct unknown_encoding {
+  struct normal_encoding normal;
+  int (*convert)(void *userData, const char *p);
+  void *userData;
+  unsigned short utf16[256];
+  char utf8[256][4];
+};
+
+int XmlSizeOfUnknownEncoding()
+{
+  return sizeof(struct unknown_encoding);
+}
+
+static
+int unknown_isName(const ENCODING *enc, const char *p)
+{
+  int c = ((const struct unknown_encoding *)enc)
+	  ->convert(((const struct unknown_encoding *)enc)->userData, p);
+  if (c & ~0xFFFF)
+    return 0;
+  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
+}
+
+static
+int unknown_isNmstrt(const ENCODING *enc, const char *p)
+{
+  int c = ((const struct unknown_encoding *)enc)
+	  ->convert(((const struct unknown_encoding *)enc)->userData, p);
+  if (c & ~0xFFFF)
+    return 0;
+  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
+}
+
+static
+int unknown_isInvalid(const ENCODING *enc, const char *p)
+{
+  int c = ((const struct unknown_encoding *)enc)
+	   ->convert(((const struct unknown_encoding *)enc)->userData, p);
+  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
+}
+
+static
+void unknown_toUtf8(const ENCODING *enc,
+		    const char **fromP, const char *fromLim,
+		    char **toP, const char *toLim)
+{
+  char buf[XML_UTF8_ENCODE_MAX];
+  for (;;) {
+    const char *utf8;
+    int n;
+    if (*fromP == fromLim)
+      break;
+    utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
+    n = *utf8++;
+    if (n == 0) {
+      int c = ((const struct unknown_encoding *)enc)
+	      ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
+      n = XmlUtf8Encode(c, buf);
+      if (n > toLim - *toP)
+	break;
+      utf8 = buf;
+      *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
+	         - (BT_LEAD2 - 2);
+    }
+    else {
+      if (n > toLim - *toP)
+	break;
+      (*fromP)++;
+    }
+    do {
+      *(*toP)++ = *utf8++;
+    } while (--n != 0);
+  }
+}
+
+static
+void unknown_toUtf16(const ENCODING *enc,
+		     const char **fromP, const char *fromLim,
+		     unsigned short **toP, const unsigned short *toLim)
+{
+  while (*fromP != fromLim && *toP != toLim) {
+    unsigned short c
+      = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
+    if (c == 0) {
+      c = (unsigned short)((const struct unknown_encoding *)enc)
+	   ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
+      *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
+	         - (BT_LEAD2 - 2);
+    }
+    else
+      (*fromP)++;
+    *(*toP)++ = c;
+  }
+}
+
+ENCODING *
+XmlInitUnknownEncoding(void *mem,
+		       int *table,
+		       int (*convert)(void *userData, const char *p),
+		       void *userData)
+{
+  int i;
+  struct unknown_encoding *e = mem;
+  for (i = 0; i < sizeof(struct normal_encoding); i++)
+    ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
+  for (i = 0; i < 128; i++)
+    if (latin1_encoding.type[i] != BT_OTHER
+        && latin1_encoding.type[i] != BT_NONXML
+	&& table[i] != i)
+      return 0;
+  for (i = 0; i < 256; i++) {
+    int c = table[i];
+    if (c == -1) {
+      e->normal.type[i] = BT_MALFORM;
+      /* This shouldn't really get used. */
+      e->utf16[i] = 0xFFFF;
+      e->utf8[i][0] = 1;
+      e->utf8[i][1] = 0;
+    }
+    else if (c < 0) {
+      if (c < -4)
+	return 0;
+      e->normal.type[i] = BT_LEAD2 - (c + 2);
+      e->utf8[i][0] = 0;
+      e->utf16[i] = 0;
+    }
+    else if (c < 0x80) {
+      if (latin1_encoding.type[c] != BT_OTHER
+	  && latin1_encoding.type[c] != BT_NONXML
+	  && c != i)
+	return 0;
+      e->normal.type[i] = latin1_encoding.type[c];
+      e->utf8[i][0] = 1;
+      e->utf8[i][1] = (char)c;
+      e->utf16[i] = c == 0 ? 0xFFFF : c;
+    }
+    else if (checkCharRefNumber(c) < 0) {
+      e->normal.type[i] = BT_NONXML;
+      /* This shouldn't really get used. */
+      e->utf16[i] = 0xFFFF;
+      e->utf8[i][0] = 1;
+      e->utf8[i][1] = 0;
+    }
+    else {
+      if (c > 0xFFFF)
+	return 0;
+      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
+	e->normal.type[i] = BT_NMSTRT;
+      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
+	e->normal.type[i] = BT_NAME;
+      else
+	e->normal.type[i] = BT_OTHER;
+      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
+      e->utf16[i] = c;
+    }
+  }
+  e->userData = userData;
+  e->convert = convert;
+  if (convert) {
+    e->normal.isName2 = unknown_isName;
+    e->normal.isName3 = unknown_isName;
+    e->normal.isName4 = unknown_isName;
+    e->normal.isNmstrt2 = unknown_isNmstrt;
+    e->normal.isNmstrt3 = unknown_isNmstrt;
+    e->normal.isNmstrt4 = unknown_isNmstrt;
+    e->normal.isInvalid2 = unknown_isInvalid;
+    e->normal.isInvalid3 = unknown_isInvalid;
+    e->normal.isInvalid4 = unknown_isInvalid;
+  }
+  e->normal.enc.utf8Convert = unknown_toUtf8;
+  e->normal.enc.utf16Convert = unknown_toUtf16;
+  return &(e->normal.enc);
+}
diff --git a/modules/xml/expat/xmltok/xmltok.h b/modules/xml/expat/xmltok/xmltok.h
index d1fa5af5c3e2..6d0b91dff2ce 100644
--- a/modules/xml/expat/xmltok/xmltok.h
+++ b/modules/xml/expat/xmltok/xmltok.h
@@ -29,6 +29,9 @@ extern "C" {
 #define XMLTOKAPI /* as nothing */
 #endif
 
+/* The following token may be returned by XmlContentTok */
+#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of
+                                    illegal ]]> sequence */
 /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
 #define XML_TOK_NONE -4    /* The string to be scanned is empty */
 #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
@@ -38,7 +41,7 @@ extern "C" {
 #define XML_TOK_INVALID 0
 
 /* The following tokens are returned by XmlContentTok; some are also
-  returned by XmlAttributeValueTok and XmlEntityTok */
+  returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
 
 #define XML_TOK_START_TAG_WITH_ATTS 1
 #define XML_TOK_START_TAG_NO_ATTS 2
@@ -47,7 +50,7 @@ extern "C" {
 #define XML_TOK_END_TAG 5
 #define XML_TOK_DATA_CHARS 6
 #define XML_TOK_DATA_NEWLINE 7
-#define XML_TOK_CDATA_SECTION 8
+#define XML_TOK_CDATA_SECT_OPEN 8
 #define XML_TOK_ENTITY_REF 9
 #define XML_TOK_CHAR_REF 10     /* numeric character reference */
 
@@ -85,25 +88,25 @@ extern "C" {
 #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
 #define XML_TOK_COMMA 38
 
-  /* The following tokens is returned only by XmlAttributeValueTok */
+/* The following token is returned only by XmlAttributeValueTok */
 #define XML_TOK_ATTRIBUTE_VALUE_S 39
 
-#define XML_N_STATES 2
+/* The following token is returned only by XmlCdataSectionTok */
+#define XML_TOK_CDATA_SECT_CLOSE 40
+
+#define XML_N_STATES 3
 #define XML_PROLOG_STATE 0
 #define XML_CONTENT_STATE 1
+#define XML_CDATA_SECTION_STATE 2
 
 #define XML_N_LITERAL_TYPES 2
 #define XML_ATTRIBUTE_VALUE_LITERAL 0
 #define XML_ENTITY_VALUE_LITERAL 1
 
-#define XML_N_INTERNAL_ENCODINGS 1
-#define XML_UTF8_ENCODING 0
-#if 0
-#define XML_UTF16_ENCODING 1
-#define XML_UCS4_ENCODING 2
-#endif
-
-#define XML_MAX_BYTES_PER_CHAR 4
+/* The size of the buffer passed to XmlUtf8Encode must be at least this. */
+#define XML_UTF8_ENCODE_MAX 4
+/* The size of the buffer passed to XmlUtf16Encode must be at least this. */
+#define XML_UTF16_ENCODE_MAX 2
 
 typedef struct position {
   /* first line and first column are 0 not 1 */
@@ -139,21 +142,26 @@ struct encoding {
   int (*getAtts)(const ENCODING *enc, const char *ptr,
 	         int attsMax, ATTRIBUTE *atts);
   int (*charRefNumber)(const ENCODING *enc, const char *ptr);
+  int (*predefinedEntityName)(const ENCODING *, const char *, const char *);
   void (*updatePosition)(const ENCODING *,
 			 const char *ptr,
 			 const char *end,
 			 POSITION *);
   int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
 		    const char **badPtr);
-  int (*encode)(const ENCODING *enc,
-		int charNum,
-		char *buf);
-  void (*convert[XML_N_INTERNAL_ENCODINGS])(const ENCODING *enc,
-					    const char **fromP,
-					    const char *fromLim,
-					    char **toP,
-					    const char *toLim);
+  void (*utf8Convert)(const ENCODING *enc,
+		      const char **fromP,
+		      const char *fromLim,
+		      char **toP,
+		      const char *toLim);
+  void (*utf16Convert)(const ENCODING *enc,
+		       const char **fromP,
+		       const char *fromLim,
+		       unsigned short **toP,
+		       const unsigned short *toLim);
   int minBytesPerChar;
+  char isUtf8;
+  char isUtf16;
 };
 
 /*
@@ -186,6 +194,9 @@ literals, comments and processing instructions.
 #define XmlContentTok(enc, ptr, end, nextTokPtr) \
    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
 
+#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
+   XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
+
 /* This is used for performing a 2nd-level tokenization on
 the content of a literal that has already been returned by XmlTok. */ 
 
@@ -215,17 +226,20 @@ the content of a literal that has already been returned by XmlTok. */
 #define XmlCharRefNumber(enc, ptr) \
   (((enc)->charRefNumber)(enc, ptr))
 
+#define XmlPredefinedEntityName(enc, ptr, end) \
+  (((enc)->predefinedEntityName)(enc, ptr, end))
+
 #define XmlUpdatePosition(enc, ptr, end, pos) \
   (((enc)->updatePosition)(enc, ptr, end, pos))
 
 #define XmlIsPublicId(enc, ptr, end, badPtr) \
   (((enc)->isPublicId)(enc, ptr, end, badPtr))
 
-#define XmlEncode(enc, ch, buf) \
-  (((enc)->encode)(enc, ch, buf))
+#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
+  (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
 
-#define XmlConvert(enc, targetEnc, fromP, fromLim, toP, toLim) \
-  (((enc)->convert[targetEnc])(enc, fromP, fromLim, toP, toLim))
+#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
+  (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
 
 typedef struct {
   ENCODING initEnc;
@@ -243,7 +257,17 @@ int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity,
 			      int *standalonePtr);
 
 int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
-const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int);
+const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding();
+const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding();
+int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf);
+int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf);
+
+int XMLTOKAPI XmlSizeOfUnknownEncoding();
+ENCODING XMLTOKAPI *
+XmlInitUnknownEncoding(void *mem,
+		       int *table,
+		       int (*convert)(void *userData, const char *p),
+		       void *userData);
 
 #ifdef __cplusplus
 }
diff --git a/modules/xml/expat/xmltok/xmltok_impl.c b/modules/xml/expat/xmltok/xmltok_impl.c
index 00475542ea04..513935ae9e02 100644
--- a/modules/xml/expat/xmltok/xmltok_impl.c
+++ b/modules/xml/expat/xmltok/xmltok_impl.c
@@ -56,7 +56,7 @@ Contributor(s):
 
 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
   case BT_NONASCII: \
-    if (!IS_NAME_CHAR(enc, ptr, MINBPC)) { \
+    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
       *nextTokPtr = ptr; \
       return XML_TOK_INVALID; \
     } \
@@ -84,7 +84,7 @@ Contributor(s):
 
 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
   case BT_NONASCII: \
-    if (!IS_NMSTRT_CHAR(enc, ptr, MINBPC)) { \
+    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
       *nextTokPtr = ptr; \
       return XML_TOK_INVALID; \
     } \
@@ -293,15 +293,14 @@ int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
   return XML_TOK_PARTIAL;
 }
 
-/* ptr points to character following "<![" */
 
 static
 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
 			     const char **nextTokPtr)
 {
   int i;
-  /* CDATA[]]> */
-  if (end - ptr < 9 * MINBPC)
+  /* CDATA[ */
+  if (end - ptr < 6 * MINBPC)
     return XML_TOK_PARTIAL;
   for (i = 0; i < 6; i++, ptr += MINBPC) {
     if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
@@ -309,22 +308,86 @@ int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *e
       return XML_TOK_INVALID;
     }
   }
-  end -= 2 * MINBPC;
-  while (ptr != end) {
-    switch (BYTE_TYPE(enc, ptr)) {
-    INVALID_CASES(ptr, nextTokPtr)
-    case BT_RSQB:
-      if (CHAR_MATCHES(enc, ptr + MINBPC, ']')
-	  && CHAR_MATCHES(enc, ptr + 2 * MINBPC, '>')) {
-	*nextTokPtr = ptr + 3 * MINBPC;
-	return XML_TOK_CDATA_SECTION;
-      }
-    /* fall through */
-    default:
-      ptr += MINBPC;
+  *nextTokPtr = ptr;
+  return XML_TOK_CDATA_SECT_OPEN;
+}
+
+static
+int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
+			    const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_NONE;
+#if MINBPC > 1
+  {
+    size_t n = end - ptr;
+    if (n & (MINBPC - 1)) {
+      n &= ~(MINBPC - 1);
+      if (n == 0)
+	return XML_TOK_PARTIAL;
+      end = ptr + n;
     }
   }
-  return XML_TOK_PARTIAL;
+#endif
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_RSQB:
+    ptr += MINBPC;
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, ']'))
+      break;
+    ptr += MINBPC;
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, '>')) {
+      ptr -= MINBPC;
+      break;
+    }
+    *nextTokPtr = ptr + MINBPC;
+    return XML_TOK_CDATA_SECT_CLOSE;
+  case BT_CR:
+    ptr += MINBPC;
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (BYTE_TYPE(enc, ptr) == BT_LF)
+      ptr += MINBPC;
+    *nextTokPtr = ptr;
+    return XML_TOK_DATA_NEWLINE;
+  case BT_LF:
+    *nextTokPtr = ptr + MINBPC;
+    return XML_TOK_DATA_NEWLINE;
+  INVALID_CASES(ptr, nextTokPtr)
+  default:
+    ptr += MINBPC;
+    break;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: \
+      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
+	*nextTokPtr = ptr; \
+	return XML_TOK_DATA_CHARS; \
+      } \
+      ptr += n; \
+      break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_NONXML:
+    case BT_MALFORM:
+    case BT_TRAIL:
+    case BT_CR:
+    case BT_LF:
+    case BT_RSQB:
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC;
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
 }
 
 /* ptr points to character following "</" */
@@ -442,7 +505,7 @@ int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
   if (ptr == end)
     return XML_TOK_PARTIAL;
   switch (BYTE_TYPE(enc, ptr)) {
-  CHECK_NMSTRT_CASES(end, ptr, end, nextTokPtr)
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_NUM:
     return PREFIX(scanCharRef)(enc, ptr + MINBPC, end, nextTokPtr);
   default:
@@ -543,6 +606,22 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
 	    break;
 	  }
 	}
+	ptr += MINBPC;
+	if (ptr == end)
+	  return XML_TOK_PARTIAL;
+	switch (BYTE_TYPE(enc, ptr)) {
+	case BT_S:
+	case BT_CR:
+	case BT_LF:
+	  break;
+	case BT_SOL:
+	  goto sol;
+	case BT_GT:
+	  goto gt;
+	default:
+	  *nextTokPtr = ptr;
+	  return XML_TOK_INVALID;
+	}
 	/* ptr points to closing quote */
 	for (;;) {
 	  ptr += MINBPC;
@@ -553,9 +632,11 @@ int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
 	  case BT_S: case BT_CR: case BT_LF:
 	    continue;
 	  case BT_GT:
+          gt:
 	    *nextTokPtr = ptr + MINBPC;
 	    return XML_TOK_START_TAG_WITH_ATTS;
 	  case BT_SOL:
+          sol:
 	    ptr += MINBPC;
 	    if (ptr == end)
 	      return XML_TOK_PARTIAL;
@@ -694,12 +775,12 @@ int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
   case BT_RSQB:
     ptr += MINBPC;
     if (ptr == end)
-      return XML_TOK_PARTIAL;
+      return XML_TOK_TRAILING_RSQB;
     if (!CHAR_MATCHES(enc, ptr, ']'))
       break;
     ptr += MINBPC;
     if (ptr == end)
-      return XML_TOK_PARTIAL;
+      return XML_TOK_TRAILING_RSQB;
     if (!CHAR_MATCHES(enc, ptr, '>')) {
       ptr -= MINBPC;
       break;
@@ -766,7 +847,7 @@ int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
   if (ptr == end)
     return XML_TOK_PARTIAL;
   switch (BYTE_TYPE(enc, ptr)) {
-  CHECK_NMSTRT_CASES(end, ptr, end, nextTokPtr)
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
     *nextTokPtr = ptr;
     return XML_TOK_PERCENT;
@@ -795,7 +876,7 @@ int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
   if (ptr == end)
     return XML_TOK_PARTIAL;
   switch (BYTE_TYPE(enc, ptr)) {
-  CHECK_NMSTRT_CASES(end, ptr, end, nextTokPtr)
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   default:
     *nextTokPtr = ptr;
     return XML_TOK_INVALID;
@@ -944,7 +1025,7 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
   case BT_RPAR:
     ptr += MINBPC;
     if (ptr == end)
-      return XML_TOK_INVALID;
+      return XML_TOK_PARTIAL;
     switch (BYTE_TYPE(enc, ptr)) {
     case BT_AST:
       *nextTokPtr = ptr + MINBPC;
@@ -1001,12 +1082,12 @@ int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
     ptr += MINBPC;
     break;
   case BT_NONASCII:
-    if (IS_NMSTRT_CHAR(enc, ptr, MINBPC)) {
+    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
       ptr += MINBPC;
       tok = XML_TOK_NAME;
       break;
     }
-    if (IS_NAME_CHAR(enc, ptr, MINBPC)) {
+    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
       ptr += MINBPC;
       tok = XML_TOK_NMTOKEN;
       break;
@@ -1343,6 +1424,59 @@ int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
   return checkCharRefNumber(result);
 }
 
+static
+int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
+{
+  switch (end - ptr) {
+  case 2 * MINBPC:
+    if (CHAR_MATCHES(enc, ptr + MINBPC, 't')) {
+      switch (BYTE_TO_ASCII(enc, ptr)) {
+      case 'l':
+	return '<';
+      case 'g':
+	return '>';
+      }
+    }
+    break;
+  case 3 * MINBPC:
+    if (CHAR_MATCHES(enc, ptr, 'a')) {
+      ptr += MINBPC;
+      if (CHAR_MATCHES(enc, ptr, 'm')) {
+	ptr += MINBPC;
+	if (CHAR_MATCHES(enc, ptr, 'p'))
+	  return '&';
+      }
+    }
+    break;
+  case 4 * MINBPC:
+    switch (BYTE_TO_ASCII(enc, ptr)) {
+    case 'q':
+      ptr += MINBPC;
+      if (CHAR_MATCHES(enc, ptr, 'u')) {
+	ptr += MINBPC;
+	if (CHAR_MATCHES(enc, ptr, 'o')) {
+	  ptr += MINBPC;
+  	  if (CHAR_MATCHES(enc, ptr, 't'))
+	    return '"';
+	}
+      }
+      break;
+    case 'a':
+      ptr += MINBPC;
+      if (CHAR_MATCHES(enc, ptr, 'p')) {
+	ptr += MINBPC;
+	if (CHAR_MATCHES(enc, ptr, 'o')) {
+	  ptr += MINBPC;
+  	  if (CHAR_MATCHES(enc, ptr, 's'))
+	    return '\'';
+	}
+      }
+      break;
+    }
+  }
+  return 0;
+}
+
 static
 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
 {
diff --git a/modules/xml/macbuild/XML.mcp.exp b/modules/xml/macbuild/XML.mcp.exp
index ce85346a71b1..e29d4a2c5120 100644
--- a/modules/xml/macbuild/XML.mcp.exp
+++ b/modules/xml/macbuild/XML.mcp.exp
@@ -1,7 +1,7 @@
 XML_ErrorString
-XML_GetErrorByteIndex
-XML_GetErrorColumnNumber
-XML_GetErrorLineNumber
+XML_GetCurrentLineNumber
+XML_GetCurrentColumnNumber
+XML_GetCurrentByteIndex
 XML_GetErrorCode
 XML_GetBuffer
 XML_ParseBuffer
@@ -19,7 +19,6 @@ hashTableDestroy
 lookup
 XmlParseXmlDecl
 XmlInitEncoding
-XmlGetInternalEncoding
 XmlPrologStateInit
 tokenizeXMLElement
 XMLDOM_PIHandler