2004-02-09 16:37:40 +03:00
|
|
|
/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/MPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
|
|
|
*
|
|
|
|
* The Original Code is mozilla.org code.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Peter Van der Beken.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 2004
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s):
|
|
|
|
* Peter Van der Beken <peter@propagandism.org>
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
|
|
|
|
#ifdef IS_LITTLE_ENDIAN
|
|
|
|
|
2006-03-17 17:38:57 +03:00
|
|
|
#define PREFIX(ident) little2_ ## ident
|
|
|
|
#define BYTE_TYPE(p) LITTLE2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p)
|
2004-02-09 20:03:59 +03:00
|
|
|
#define IS_NAME_CHAR_MINBPC(p) LITTLE2_IS_NAME_CHAR_MINBPC(0, p)
|
|
|
|
#define IS_NMSTRT_CHAR_MINBPC(p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(0, p)
|
2004-02-09 16:37:40 +03:00
|
|
|
|
|
|
|
#else
|
|
|
|
|
2006-03-17 17:38:57 +03:00
|
|
|
#define PREFIX(ident) big2_ ## ident
|
|
|
|
#define BYTE_TYPE(p) BIG2_BYTE_TYPE(XmlGetUtf16InternalEncodingNS(), p)
|
2004-02-09 20:03:59 +03:00
|
|
|
#define IS_NAME_CHAR_MINBPC(p) BIG2_IS_NAME_CHAR_MINBPC(0, p)
|
|
|
|
#define IS_NMSTRT_CHAR_MINBPC(p) BIG2_IS_NMSTRT_CHAR_MINBPC(0, p)
|
2004-02-09 16:37:40 +03:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2004-03-06 19:00:23 +03:00
|
|
|
#define MOZ_EXPAT_VALID_QNAME (0)
|
|
|
|
#define MOZ_EXPAT_EMPTY_QNAME (1 << 0)
|
|
|
|
#define MOZ_EXPAT_INVALID_CHARACTER (1 << 1)
|
|
|
|
#define MOZ_EXPAT_MALFORMED (1 << 2)
|
2004-02-09 16:37:40 +03:00
|
|
|
|
2004-03-06 19:00:23 +03:00
|
|
|
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware,
|
|
|
|
const char** colon)
|
2004-02-09 16:37:40 +03:00
|
|
|
{
|
2004-03-06 19:00:23 +03:00
|
|
|
int result = MOZ_EXPAT_VALID_QNAME;
|
|
|
|
int nmstrt = 1;
|
2004-02-09 16:37:40 +03:00
|
|
|
*colon = 0;
|
2004-03-06 19:00:23 +03:00
|
|
|
if (ptr == end) {
|
|
|
|
return MOZ_EXPAT_EMPTY_QNAME;
|
2004-02-09 16:37:40 +03:00
|
|
|
}
|
2004-03-06 19:00:23 +03:00
|
|
|
do {
|
2004-02-09 16:37:40 +03:00
|
|
|
switch (BYTE_TYPE(ptr)) {
|
|
|
|
case BT_COLON:
|
2008-02-16 02:22:55 +03:00
|
|
|
/* We're namespace-aware and either first or last character is a colon
|
|
|
|
or we've already seen a colon. */
|
|
|
|
if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) {
|
2008-02-12 07:14:57 +03:00
|
|
|
return MOZ_EXPAT_MALFORMED;
|
2004-03-06 19:00:23 +03:00
|
|
|
}
|
2008-02-12 07:14:57 +03:00
|
|
|
*colon = ptr;
|
|
|
|
nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */
|
2004-03-06 19:00:23 +03:00
|
|
|
break;
|
|
|
|
case BT_NONASCII:
|
2008-02-12 07:14:57 +03:00
|
|
|
if (nmstrt && !IS_NMSTRT_CHAR_MINBPC(ptr)) {
|
|
|
|
/* If this is a valid name character and we're namespace-aware, the
|
|
|
|
QName is malformed. Otherwise, this character's invalid at the
|
|
|
|
start of a name (or, if we're namespace-aware, at the start of a
|
|
|
|
localpart). */
|
|
|
|
return (IS_NAME_CHAR_MINBPC(ptr) && ns_aware) ?
|
|
|
|
MOZ_EXPAT_MALFORMED :
|
|
|
|
MOZ_EXPAT_INVALID_CHARACTER;
|
2004-02-09 16:37:40 +03:00
|
|
|
}
|
2008-02-12 07:14:57 +03:00
|
|
|
if (!IS_NAME_CHAR_MINBPC(ptr)) {
|
|
|
|
return MOZ_EXPAT_INVALID_CHARACTER;
|
2004-03-06 19:00:23 +03:00
|
|
|
}
|
|
|
|
nmstrt = 0;
|
2004-02-09 16:37:40 +03:00
|
|
|
break;
|
2004-03-06 19:00:23 +03:00
|
|
|
case BT_NMSTRT:
|
|
|
|
case BT_HEX:
|
|
|
|
nmstrt = 0;
|
|
|
|
break;
|
|
|
|
case BT_DIGIT:
|
|
|
|
case BT_NAME:
|
|
|
|
case BT_MINUS:
|
|
|
|
if (nmstrt) {
|
2008-02-12 07:14:57 +03:00
|
|
|
return MOZ_EXPAT_INVALID_CHARACTER;
|
2004-03-06 19:00:23 +03:00
|
|
|
}
|
2004-02-09 16:37:40 +03:00
|
|
|
break;
|
|
|
|
default:
|
2008-02-12 07:14:57 +03:00
|
|
|
return MOZ_EXPAT_INVALID_CHARACTER;
|
2004-02-09 16:37:40 +03:00
|
|
|
}
|
2004-03-06 19:00:23 +03:00
|
|
|
ptr += 2;
|
|
|
|
} while (ptr != end);
|
|
|
|
return result;
|
2004-02-09 16:37:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
int MOZ_XMLIsLetter(const char* ptr)
|
|
|
|
{
|
|
|
|
switch (BYTE_TYPE(ptr)) {
|
2004-03-06 19:00:23 +03:00
|
|
|
case BT_NONASCII:
|
|
|
|
if (!IS_NMSTRT_CHAR_MINBPC(ptr)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
case BT_NMSTRT:
|
|
|
|
case BT_HEX:
|
2004-02-09 16:37:40 +03:00
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int MOZ_XMLIsNCNameChar(const char* ptr)
|
|
|
|
{
|
|
|
|
switch (BYTE_TYPE(ptr)) {
|
2004-03-06 19:00:23 +03:00
|
|
|
case BT_NONASCII:
|
|
|
|
if (!IS_NAME_CHAR_MINBPC(ptr)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
case BT_NMSTRT:
|
|
|
|
case BT_HEX:
|
|
|
|
case BT_DIGIT:
|
|
|
|
case BT_NAME:
|
|
|
|
case BT_MINUS:
|
2004-02-09 16:37:40 +03:00
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-03-17 17:38:57 +03:00
|
|
|
int MOZ_XMLTranslateEntity(const char* ptr, const char* end, const char** next,
|
|
|
|
XML_Char* result)
|
|
|
|
{
|
|
|
|
const ENCODING* enc = XmlGetUtf16InternalEncodingNS();
|
|
|
|
int tok = PREFIX(scanRef)(enc, ptr, end, next);
|
|
|
|
if (tok <= XML_TOK_INVALID) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tok == XML_TOK_CHAR_REF) {
|
|
|
|
int n = XmlCharRefNumber(enc, ptr);
|
|
|
|
|
|
|
|
/* We could get away with just < 0, but better safe than sorry. */
|
|
|
|
if (n <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return XmlUtf16Encode(n, (unsigned short*)result);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tok == XML_TOK_ENTITY_REF) {
|
|
|
|
/* *next points to after the semicolon, so the entity ends at
|
|
|
|
*next - enc->minBytesPerChar. */
|
|
|
|
XML_Char ch =
|
|
|
|
(XML_Char)XmlPredefinedEntityName(enc, ptr, *next - enc->minBytesPerChar);
|
|
|
|
if (!ch) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
*result = ch;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef PREFIX
|
2004-02-09 16:37:40 +03:00
|
|
|
#undef BYTE_TYPE
|
|
|
|
#undef IS_NAME_CHAR_MINBPC
|
|
|
|
#undef IS_NMSTRT_CHAR_MINBPC
|