gecko-dev/security/nss/lib/base/utf8.c

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/*
 * utf8.c
 *
 * This file contains some additional utility routines required for
 * handling UTF8 strings.
 */

#ifndef BASE_H
#include "base.h"
#endif /* BASE_H */

#include "plstr.h"

/*
 * NOTES:
 *
 * There's an "is hex string" function in pki1/atav.c.  If we need
 * it in more places, pull that one out.
 */

/*
 * nssUTF8_CaseIgnoreMatch
 *
 * Returns true if the two UTF8-encoded strings pointed to by the
 * two specified NSSUTF8 pointers differ only in typcase.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *
 * Return value:
 *  PR_TRUE if the strings match, ignoring case
 *  PR_FALSE if they don't
 *  PR_FALSE upon error
 */

NSS_IMPLEMENT PRBool
nssUTF8_CaseIgnoreMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)
{
#ifdef NSSDEBUG
    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        if ((PRStatus *)NULL != statusOpt) {
            *statusOpt = PR_FAILURE;
        }
        return PR_FALSE;
    }
#endif /* NSSDEBUG */

    if ((PRStatus *)NULL != statusOpt) {
        *statusOpt = PR_SUCCESS;
    }

    /*
     * XXX fgmr
     *
     * This is, like, so wrong!
     */
    if (0 == PL_strcasecmp((const char *)a, (const char *)b)) {
        return PR_TRUE;
    } else {
        return PR_FALSE;
    }
}

/*
 * nssUTF8_PrintableMatch
 *
 * Returns true if the two Printable strings pointed to by the
 * two specified NSSUTF8 pointers match when compared with the
 * rules for Printable String (leading and trailing spaces are
 * disregarded, extents of whitespace match irregardless of length,
 * and case is not significant), then PR_TRUE will be returned.
 * Otherwise, PR_FALSE will be returned.  Upon failure, PR_FALSE
 * will be returned.  If the optional statusOpt argument is not
 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
 * location.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *
 * Return value:
 *  PR_TRUE if the strings match, ignoring case
 *  PR_FALSE if they don't
 *  PR_FALSE upon error
 */

NSS_IMPLEMENT PRBool
nssUTF8_PrintableMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)
{
    PRUint8 *c;
    PRUint8 *d;

#ifdef NSSDEBUG
    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        if ((PRStatus *)NULL != statusOpt) {
            *statusOpt = PR_FAILURE;
        }
        return PR_FALSE;
    }
#endif /* NSSDEBUG */

    if ((PRStatus *)NULL != statusOpt) {
        *statusOpt = PR_SUCCESS;
    }

    c = (PRUint8 *)a;
    d = (PRUint8 *)b;

    while (' ' == *c) {
        c++;
    }

    while (' ' == *d) {
        d++;
    }

    while (('\0' != *c) && ('\0' != *d)) {
        PRUint8 e, f;

        e = *c;
        f = *d;

        if (('a' <= e) && (e <= 'z')) {
            e -= ('a' - 'A');
        }

        if (('a' <= f) && (f <= 'z')) {
            f -= ('a' - 'A');
        }

        if (e != f) {
            return PR_FALSE;
        }

        c++;
        d++;

        if (' ' == *c) {
            while (' ' == *c) {
                c++;
            }
            c--;
        }

        if (' ' == *d) {
            while (' ' == *d) {
                d++;
            }
            d--;
        }
    }

    while (' ' == *c) {
        c++;
    }

    while (' ' == *d) {
        d++;
    }

    if (*c == *d) {
        /* And both '\0', btw */
        return PR_TRUE;
    } else {
        return PR_FALSE;
    }
}

/*
 * nssUTF8_Duplicate
 *
 * This routine duplicates the UTF8-encoded string pointed to by the
 * specified NSSUTF8 pointer.  If the optional arenaOpt argument is
 * not null, the memory required will be obtained from that arena;
 * otherwise, the memory required will be obtained from the heap.
 * A pointer to the new string will be returned.  In case of error,
 * an error will be placed on the error stack and NULL will be
 * returned.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_INVALID_ARENA
 *  NSS_ERROR_NO_MEMORY
 */

NSS_IMPLEMENT NSSUTF8 *
nssUTF8_Duplicate(const NSSUTF8 *s, NSSArena *arenaOpt)
{
    NSSUTF8 *rv;
    PRUint32 len;

#ifdef NSSDEBUG
    if ((const NSSUTF8 *)NULL == s) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        return (NSSUTF8 *)NULL;
    }

    if ((NSSArena *)NULL != arenaOpt) {
        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {
            return (NSSUTF8 *)NULL;
        }
    }
#endif /* NSSDEBUG */

    len = PL_strlen((const char *)s);
#ifdef PEDANTIC
    if ('\0' != ((const char *)s)[len]) {
        /* must have wrapped, e.g., too big for PRUint32 */
        nss_SetError(NSS_ERROR_NO_MEMORY);
        return (NSSUTF8 *)NULL;
    }
#endif     /* PEDANTIC */
    len++; /* zero termination */

    rv = nss_ZAlloc(arenaOpt, len);
    if ((void *)NULL == rv) {
        return (NSSUTF8 *)NULL;
    }

    (void)nsslibc_memcpy(rv, s, len);
    return rv;
}

/*
 * nssUTF8_Size
 *
 * This routine returns the length in bytes (including the terminating
 * null) of the UTF8-encoded string pointed to by the specified
 * NSSUTF8 pointer.  Zero is returned on error.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_VALUE_TOO_LARGE
 *
 * Return value:
 *  0 on error
 *  nonzero length of the string.
 */

NSS_IMPLEMENT PRUint32
nssUTF8_Size(const NSSUTF8 *s, PRStatus *statusOpt)
{
    PRUint32 sv;

#ifdef NSSDEBUG
    if ((const NSSUTF8 *)NULL == s) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        if ((PRStatus *)NULL != statusOpt) {
            *statusOpt = PR_FAILURE;
        }
        return 0;
    }
#endif /* NSSDEBUG */

    sv = PL_strlen((const char *)s) + 1;
#ifdef PEDANTIC
    if ('\0' != ((const char *)s)[sv - 1]) {
        /* wrapped */
        nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
        if ((PRStatus *)NULL != statusOpt) {
            *statusOpt = PR_FAILURE;
        }
        return 0;
    }
#endif /* PEDANTIC */

    if ((PRStatus *)NULL != statusOpt) {
        *statusOpt = PR_SUCCESS;
    }

    return sv;
}

/*
 * nssUTF8_Length
 *
 * This routine returns the length in characters (not including the
 * terminating null) of the UTF8-encoded string pointed to by the
 * specified NSSUTF8 pointer.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_VALUE_TOO_LARGE
 *  NSS_ERROR_INVALID_STRING
 *
 * Return value:
 *  length of the string (which may be zero)
 *  0 on error
 */

NSS_IMPLEMENT PRUint32
nssUTF8_Length(const NSSUTF8 *s, PRStatus *statusOpt)
{
    PRUint32 l = 0;
    const PRUint8 *c = (const PRUint8 *)s;

#ifdef NSSDEBUG
    if ((const NSSUTF8 *)NULL == s) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        goto loser;
    }
#endif /* NSSDEBUG */

    /*
     * From RFC 2044:
     *
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
     * 0000 0000-0000 007F   0xxxxxxx
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
     * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
     * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
     */

    while (0 != *c) {
        PRUint32 incr;
        if ((*c & 0x80) == 0) {
            incr = 1;
        } else if ((*c & 0xE0) == 0xC0) {
            incr = 2;
        } else if ((*c & 0xF0) == 0xE0) {
            incr = 3;
        } else if ((*c & 0xF8) == 0xF0) {
            incr = 4;
        } else if ((*c & 0xFC) == 0xF8) {
            incr = 5;
        } else if ((*c & 0xFE) == 0xFC) {
            incr = 6;
        } else {
            nss_SetError(NSS_ERROR_INVALID_STRING);
            goto loser;
        }

        l += incr;

#ifdef PEDANTIC
        if (l < incr) {
            /* Wrapped-- too big */
            nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
            goto loser;
        }

        {
            PRUint8 *d;
            for (d = &c[1]; d < &c[incr]; d++) {
                if ((*d & 0xC0) != 0xF0) {
                    nss_SetError(NSS_ERROR_INVALID_STRING);
                    goto loser;
                }
            }
        }
#endif /* PEDANTIC */

        c += incr;
    }

    if ((PRStatus *)NULL != statusOpt) {
        *statusOpt = PR_SUCCESS;
    }

    return l;

loser:
    if ((PRStatus *)NULL != statusOpt) {
        *statusOpt = PR_FAILURE;
    }

    return 0;
}

/*
 * nssUTF8_Create
 *
 * This routine creates a UTF8 string from a string in some other
 * format.  Some types of string may include embedded null characters,
 * so for them the length parameter must be used.  For string types
 * that are null-terminated, the length parameter is optional; if it
 * is zero, it will be ignored.  If the optional arena argument is
 * non-null, the memory used for the new string will be obtained from
 * that arena, otherwise it will be obtained from the heap.  This
 * routine may return NULL upon error, in which case it will have
 * placed an error on the error stack.
 *
 * The error may be one of the following:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_NO_MEMORY
 *  NSS_ERROR_UNSUPPORTED_TYPE
 *
 * Return value:
 *  NULL upon error
 *  A non-null pointer to a new UTF8 string otherwise
 */

extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */

NSS_IMPLEMENT NSSUTF8 *
nssUTF8_Create(NSSArena *arenaOpt, nssStringType type, const void *inputString,
               PRUint32 size /* in bytes, not characters */
               )
{
    NSSUTF8 *rv = NULL;

#ifdef NSSDEBUG
    if ((NSSArena *)NULL != arenaOpt) {
        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {
            return (NSSUTF8 *)NULL;
        }
    }

    if ((const void *)NULL == inputString) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        return (NSSUTF8 *)NULL;
    }
#endif /* NSSDEBUG */

    switch (type) {
        case nssStringType_DirectoryString:
            /* This is a composite type requiring BER */
            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
            break;
        case nssStringType_TeletexString:
            /*
             * draft-ietf-pkix-ipki-part1-11 says in part:
             *
             * In addition, many legacy implementations support names encoded
             * in the ISO 8859-1 character set (Latin1String) but tag them as
             * TeletexString.  The Latin1String includes characters used in
             * Western European countries which are not part of the
             * TeletexString charcter set.  Implementations that process
             * TeletexString SHOULD be prepared to handle the entire ISO
             * 8859-1 character set.[ISO 8859-1].
             */
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_PrintableString:
            /*
             * PrintableString consists of A-Za-z0-9 ,()+,-./:=?
             * This is a subset of ASCII, which is a subset of UTF8.
             * So we can just duplicate the string over.
             */

            if (0 == size) {
                rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
            } else {
                rv = nss_ZAlloc(arenaOpt, size + 1);
                if ((NSSUTF8 *)NULL == rv) {
                    return (NSSUTF8 *)NULL;
                }

                (void)nsslibc_memcpy(rv, inputString, size);
            }

            break;
        case nssStringType_UniversalString:
            /* 4-byte unicode */
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_BMPString:
            /* Base Multilingual Plane of Unicode */
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_UTF8String:
            if (0 == size) {
                rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
            } else {
                rv = nss_ZAlloc(arenaOpt, size + 1);
                if ((NSSUTF8 *)NULL == rv) {
                    return (NSSUTF8 *)NULL;
                }

                (void)nsslibc_memcpy(rv, inputString, size);
            }

            break;
        case nssStringType_PHGString:
            /*
             * PHGString is an IA5String (with case-insensitive comparisons).
             * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
             * currency symbol.
             */
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_GeneralString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        default:
            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
            break;
    }

    return rv;
}

NSS_IMPLEMENT NSSItem *
nssUTF8_GetEncoding(NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type,
                    NSSUTF8 *string)
{
    NSSItem *rv = (NSSItem *)NULL;
    PRStatus status = PR_SUCCESS;

#ifdef NSSDEBUG
    if ((NSSArena *)NULL != arenaOpt) {
        if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) {
            return (NSSItem *)NULL;
        }
    }

    if ((NSSUTF8 *)NULL == string) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        return (NSSItem *)NULL;
    }
#endif /* NSSDEBUG */

    switch (type) {
        case nssStringType_DirectoryString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_TeletexString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_PrintableString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_UniversalString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_BMPString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        case nssStringType_UTF8String: {
            NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);
            if ((NSSUTF8 *)NULL == dup) {
                return (NSSItem *)NULL;
            }

            if ((NSSItem *)NULL == rvOpt) {
                rv = nss_ZNEW(arenaOpt, NSSItem);
                if ((NSSItem *)NULL == rv) {
                    (void)nss_ZFreeIf(dup);
                    return (NSSItem *)NULL;
                }
            } else {
                rv = rvOpt;
            }

            rv->data = dup;
            dup = (NSSUTF8 *)NULL;
            rv->size = nssUTF8_Size(rv->data, &status);
            if ((0 == rv->size) && (PR_SUCCESS != status)) {
                if ((NSSItem *)NULL == rvOpt) {
                    (void)nss_ZFreeIf(rv);
                }
                return (NSSItem *)NULL;
            }
        } break;
        case nssStringType_PHGString:
            nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
            break;
        default:
            nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
            break;
    }

    return rv;
}

/*
 * nssUTF8_CopyIntoFixedBuffer
 *
 * This will copy a UTF8 string into a fixed-length buffer, making
 * sure that the all characters are valid.  Any remaining space will
 * be padded with the specified ASCII character, typically either
 * null or space.
 *
 * Blah, blah, blah.
 */

NSS_IMPLEMENT PRStatus
nssUTF8_CopyIntoFixedBuffer(NSSUTF8 *string, char *buffer, PRUint32 bufferSize,
                            char pad)
{
    PRUint32 stringSize = 0;

#ifdef NSSDEBUG
    if ((char *)NULL == buffer) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        return PR_FALSE;
    }

    if (0 == bufferSize) {
        nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
        return PR_FALSE;
    }

    if ((pad & 0x80) != 0x00) {
        nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
        return PR_FALSE;
    }
#endif /* NSSDEBUG */

    if ((NSSUTF8 *)NULL == string) {
        string = (NSSUTF8 *)"";
    }

    stringSize = nssUTF8_Size(string, (PRStatus *)NULL);
    stringSize--; /* don't count the trailing null */
    if (stringSize > bufferSize) {
        PRUint32 bs = bufferSize;
        (void)nsslibc_memcpy(buffer, string, bufferSize);

        if ((((buffer[bs - 1] & 0x80) == 0x00)) ||
            ((bs > 1) && ((buffer[bs - 2] & 0xE0) == 0xC0)) ||
            ((bs > 2) && ((buffer[bs - 3] & 0xF0) == 0xE0)) ||
            ((bs > 3) && ((buffer[bs - 4] & 0xF8) == 0xF0)) ||
            ((bs > 4) && ((buffer[bs - 5] & 0xFC) == 0xF8)) ||
            ((bs > 5) && ((buffer[bs - 6] & 0xFE) == 0xFC))) {
            /* It fit exactly */
            return PR_SUCCESS;
        }

        /* Too long.  We have to trim the last character */
        for (/*bs*/; bs != 0; bs--) {
            if ((buffer[bs - 1] & 0xC0) != 0x80) {
                buffer[bs - 1] = pad;
                break;
            } else {
                buffer[bs - 1] = pad;
            }
        }
    } else {
        (void)nsslibc_memset(buffer, pad, bufferSize);
        (void)nsslibc_memcpy(buffer, string, stringSize);
    }

    return PR_SUCCESS;
}

/*
 * nssUTF8_Equal
 *
 */

NSS_IMPLEMENT PRBool
nssUTF8_Equal(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt)
{
    PRUint32 la, lb;

#ifdef NSSDEBUG
    if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) {
        nss_SetError(NSS_ERROR_INVALID_POINTER);
        if ((PRStatus *)NULL != statusOpt) {
            *statusOpt = PR_FAILURE;
        }
        return PR_FALSE;
    }
#endif /* NSSDEBUG */

    la = nssUTF8_Size(a, statusOpt);
    if (0 == la) {
        return PR_FALSE;
    }

    lb = nssUTF8_Size(b, statusOpt);
    if (0 == lb) {
        return PR_FALSE;
    }

    if (la != lb) {
        return PR_FALSE;
    }

    return nsslibc_memequal(a, b, la, statusOpt);
}