pjs/security/nss/lib/base/utf8.c

/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the Netscape security libraries.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1994-2000
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#ifdef DEBUG
static const char CVS_ID[] = "@(#) $RCSfile: utf8.c,v $ $Revision: 1.6 $ $Date: 2004-04-25 15:03:02 $ $Name:  $";
#endif /* DEBUG */

/*
 * utf8.c
 *
 * This file contains some additional utility routines required for
 * handling UTF8 strings.
 */

#ifndef BASE_H
#include "base.h"
#endif /* BASE_H */

#include "plstr.h"

/*
 * NOTES:
 *
 * There's an "is hex string" function in pki1/atav.c.  If we need
 * it in more places, pull that one out.
 */

/*
 * nssUTF8_CaseIgnoreMatch
 *
 * Returns true if the two UTF8-encoded strings pointed to by the
 * two specified NSSUTF8 pointers differ only in typcase.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *
 * Return value:
 *  PR_TRUE if the strings match, ignoring case
 *  PR_FALSE if they don't
 *  PR_FALSE upon error
 */

NSS_IMPLEMENT PRBool
nssUTF8_CaseIgnoreMatch
(
  const NSSUTF8 *a,
  const NSSUTF8 *b,
  PRStatus *statusOpt
)
{
#ifdef NSSDEBUG
  if( ((const NSSUTF8 *)NULL == a) ||
      ((const NSSUTF8 *)NULL == b) ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    if( (PRStatus *)NULL != statusOpt ) {
      *statusOpt = PR_FAILURE;
    }
    return PR_FALSE;
  }
#endif /* NSSDEBUG */

  if( (PRStatus *)NULL != statusOpt ) {
    *statusOpt = PR_SUCCESS;
  }

  /*
   * XXX fgmr
   *
   * This is, like, so wrong!
   */
  if( 0 == PL_strcasecmp((const char *)a, (const char *)b) ) {
    return PR_TRUE;
  } else {
    return PR_FALSE;
  }
}

/*
 * nssUTF8_PrintableMatch
 *
 * Returns true if the two Printable strings pointed to by the
 * two specified NSSUTF8 pointers match when compared with the
 * rules for Printable String (leading and trailing spaces are
 * disregarded, extents of whitespace match irregardless of length,
 * and case is not significant), then PR_TRUE will be returned.
 * Otherwise, PR_FALSE will be returned.  Upon failure, PR_FALSE
 * will be returned.  If the optional statusOpt argument is not
 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that
 * location.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *
 * Return value:
 *  PR_TRUE if the strings match, ignoring case
 *  PR_FALSE if they don't
 *  PR_FALSE upon error
 */

NSS_IMPLEMENT PRBool
nssUTF8_PrintableMatch
(
  const NSSUTF8 *a,
  const NSSUTF8 *b,
  PRStatus *statusOpt
)
{
  PRUint8 *c;
  PRUint8 *d;

#ifdef NSSDEBUG
  if( ((const NSSUTF8 *)NULL == a) ||
      ((const NSSUTF8 *)NULL == b) ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    if( (PRStatus *)NULL != statusOpt ) {
      *statusOpt = PR_FAILURE;
    }
    return PR_FALSE;
  }
#endif /* NSSDEBUG */

  if( (PRStatus *)NULL != statusOpt ) {
    *statusOpt = PR_SUCCESS;
  }

  c = (PRUint8 *)a;
  d = (PRUint8 *)b;

  while( ' ' == *c ) {
    c++;
  }

  while( ' ' == *d ) {
    d++;
  }

  while( ('\0' != *c) && ('\0' != *d) ) {
    PRUint8 e, f;

    e = *c;
    f = *d;

    if( ('a' <= e) && (e <= 'z') ) {
      e -= ('a' - 'A');
    }

    if( ('a' <= f) && (f <= 'z') ) {
      f -= ('a' - 'A');
    }

    if( e != f ) {
      return PR_FALSE;
    }

    c++;
    d++;

    if( ' ' == *c ) {
      while( ' ' == *c ) {
        c++;
      }
      c--;
    }

    if( ' ' == *d ) {
      while( ' ' == *d ) {
        d++;
      }
      d--;
    }
  }

  while( ' ' == *c ) {
    c++;
  }

  while( ' ' == *d ) {
    d++;
  }

  if( *c == *d ) {
    /* And both '\0', btw */
    return PR_TRUE;
  } else {
    return PR_FALSE;
  }
}

/*
 * nssUTF8_Duplicate
 *
 * This routine duplicates the UTF8-encoded string pointed to by the
 * specified NSSUTF8 pointer.  If the optional arenaOpt argument is
 * not null, the memory required will be obtained from that arena;
 * otherwise, the memory required will be obtained from the heap.
 * A pointer to the new string will be returned.  In case of error,
 * an error will be placed on the error stack and NULL will be
 * returned.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_INVALID_ARENA
 *  NSS_ERROR_NO_MEMORY
 */

NSS_IMPLEMENT NSSUTF8 *
nssUTF8_Duplicate
(
  const NSSUTF8 *s,
  NSSArena *arenaOpt
)
{
  NSSUTF8 *rv;
  PRUint32 len;

#ifdef NSSDEBUG
  if( (const NSSUTF8 *)NULL == s ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    return (NSSUTF8 *)NULL;
  }

  if( (NSSArena *)NULL != arenaOpt ) {
    if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
      return (NSSUTF8 *)NULL;
    }
  }
#endif /* NSSDEBUG */

  len = PL_strlen((const char *)s);
#ifdef PEDANTIC
  if( '\0' != ((const char *)s)[ len ] ) {
    /* must have wrapped, e.g., too big for PRUint32 */
    nss_SetError(NSS_ERROR_NO_MEMORY);
    return (NSSUTF8 *)NULL;
  }
#endif /* PEDANTIC */
  len++; /* zero termination */

  rv = nss_ZAlloc(arenaOpt, len);
  if( (void *)NULL == rv ) {
    return (NSSUTF8 *)NULL;
  }

  (void)nsslibc_memcpy(rv, s, len);
  return rv;
}

/*
 * nssUTF8_Size
 *
 * This routine returns the length in bytes (including the terminating
 * null) of the UTF8-encoded string pointed to by the specified
 * NSSUTF8 pointer.  Zero is returned on error.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_VALUE_TOO_LARGE
 *
 * Return value:
 *  0 on error
 *  nonzero length of the string.
 */

NSS_IMPLEMENT PRUint32
nssUTF8_Size
(
  const NSSUTF8 *s,
  PRStatus *statusOpt
)
{
  PRUint32 sv;

#ifdef NSSDEBUG
  if( (const NSSUTF8 *)NULL == s ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    if( (PRStatus *)NULL != statusOpt ) {
      *statusOpt = PR_FAILURE;
    }
    return 0;
  }
#endif /* NSSDEBUG */

  sv = PL_strlen((const char *)s) + 1;
#ifdef PEDANTIC
  if( '\0' != ((const char *)s)[ sv-1 ] ) {
    /* wrapped */
    nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
    if( (PRStatus *)NULL != statusOpt ) {
      *statusOpt = PR_FAILURE;
    }
    return 0;
  }
#endif /* PEDANTIC */

  if( (PRStatus *)NULL != statusOpt ) {
    *statusOpt = PR_SUCCESS;
  }

  return sv;
}

/*
 * nssUTF8_Length
 *
 * This routine returns the length in characters (not including the
 * terminating null) of the UTF8-encoded string pointed to by the
 * specified NSSUTF8 pointer.
 *
 * The error may be one of the following values:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_VALUE_TOO_LARGE
 *  NSS_ERROR_INVALID_STRING
 *
 * Return value:
 *  length of the string (which may be zero)
 *  0 on error
 */

NSS_IMPLEMENT PRUint32
nssUTF8_Length
(
  const NSSUTF8 *s,
  PRStatus *statusOpt
)
{
  PRUint32 l = 0;
  const PRUint8 *c = (const PRUint8 *)s;

#ifdef NSSDEBUG
  if( (const NSSUTF8 *)NULL == s ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    goto loser;
  }
#endif /* NSSDEBUG */

  /*
   * From RFC 2044:
   *
   * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
   * 0000 0000-0000 007F   0xxxxxxx
   * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
   * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
   * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
   * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
   * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
   */

  while( 0 != *c ) {
    PRUint32 incr;
    if( (*c & 0x80) == 0 ) {
      incr = 1;
    } else if( (*c & 0xE0) == 0xC0 ) {
      incr = 2;
    } else if( (*c & 0xF0) == 0xE0 ) {
      incr = 3;
    } else if( (*c & 0xF8) == 0xF0 ) {
      incr = 4;
    } else if( (*c & 0xFC) == 0xF8 ) {
      incr = 5;
    } else if( (*c & 0xFE) == 0xFC ) {
      incr = 6;
    } else {
      nss_SetError(NSS_ERROR_INVALID_STRING);
      goto loser;
    }

    l += incr;

#ifdef PEDANTIC
    if( l < incr ) {
      /* Wrapped-- too big */
      nss_SetError(NSS_ERROR_VALUE_TOO_LARGE);
      goto loser;
    }

    {
      PRUint8 *d;
      for( d = &c[1]; d < &c[incr]; d++ ) {
        if( (*d & 0xC0) != 0xF0 ) {
          nss_SetError(NSS_ERROR_INVALID_STRING);
          goto loser;
        }
      }
    }
#endif /* PEDANTIC */

    c += incr;
  }

  if( (PRStatus *)NULL != statusOpt ) {
    *statusOpt = PR_SUCCESS;
  }

  return l;

 loser:
  if( (PRStatus *)NULL != statusOpt ) {
    *statusOpt = PR_FAILURE;
  }

  return 0;
}


/*
 * nssUTF8_Create
 *
 * This routine creates a UTF8 string from a string in some other
 * format.  Some types of string may include embedded null characters,
 * so for them the length parameter must be used.  For string types
 * that are null-terminated, the length parameter is optional; if it
 * is zero, it will be ignored.  If the optional arena argument is
 * non-null, the memory used for the new string will be obtained from
 * that arena, otherwise it will be obtained from the heap.  This
 * routine may return NULL upon error, in which case it will have
 * placed an error on the error stack.
 *
 * The error may be one of the following:
 *  NSS_ERROR_INVALID_POINTER
 *  NSS_ERROR_NO_MEMORY
 *  NSS_ERROR_UNSUPPORTED_TYPE
 *
 * Return value:
 *  NULL upon error
 *  A non-null pointer to a new UTF8 string otherwise
 */

extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */

NSS_IMPLEMENT NSSUTF8 *
nssUTF8_Create
(
  NSSArena *arenaOpt,
  nssStringType type,
  const void *inputString,
  PRUint32 size /* in bytes, not characters */
)
{
  NSSUTF8 *rv = NULL;

#ifdef NSSDEBUG
  if( (NSSArena *)NULL != arenaOpt ) {
    if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
      return (NSSUTF8 *)NULL;
    }
  }

  if( (const void *)NULL == inputString ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    return (NSSUTF8 *)NULL;
  }
#endif /* NSSDEBUG */

  switch( type ) {
  case nssStringType_DirectoryString:
    /* This is a composite type requiring BER */
    nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
    break;
  case nssStringType_TeletexString:
    /*
     * draft-ietf-pkix-ipki-part1-11 says in part:
     *
     * In addition, many legacy implementations support names encoded
     * in the ISO 8859-1 character set (Latin1String) but tag them as
     * TeletexString.  The Latin1String includes characters used in
     * Western European countries which are not part of the
     * TeletexString charcter set.  Implementations that process
     * TeletexString SHOULD be prepared to handle the entire ISO
     * 8859-1 character set.[ISO 8859-1].
     */
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_PrintableString:
    /*
     * PrintableString consists of A-Za-z0-9 ,()+,-./:=?
     * This is a subset of ASCII, which is a subset of UTF8.
     * So we can just duplicate the string over.
     */

    if( 0 == size ) {
      rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
    } else {
      rv = nss_ZAlloc(arenaOpt, size+1);
      if( (NSSUTF8 *)NULL == rv ) {
        return (NSSUTF8 *)NULL;
      }

      (void)nsslibc_memcpy(rv, inputString, size);
    }

    break;
  case nssStringType_UniversalString:
    /* 4-byte unicode */
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_BMPString:
    /* Base Multilingual Plane of Unicode */
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_UTF8String:
    if( 0 == size ) {
      rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt);
    } else {
      rv = nss_ZAlloc(arenaOpt, size+1);
      if( (NSSUTF8 *)NULL == rv ) {
        return (NSSUTF8 *)NULL;
      }

      (void)nsslibc_memcpy(rv, inputString, size);
    }

    break;
  case nssStringType_PHGString:
    /*
     * PHGString is an IA5String (with case-insensitive comparisons).
     * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has
     * currency symbol.
     */
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_GeneralString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  default:
    nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
    break;
  }

  return rv;
}

NSS_IMPLEMENT NSSItem *
nssUTF8_GetEncoding
(
  NSSArena *arenaOpt,
  NSSItem *rvOpt,
  nssStringType type,
  NSSUTF8 *string
)
{
  NSSItem *rv = (NSSItem *)NULL;
  PRStatus status = PR_SUCCESS;

#ifdef NSSDEBUG
  if( (NSSArena *)NULL != arenaOpt ) {
    if( PR_SUCCESS != nssArena_verifyPointer(arenaOpt) ) {
      return (NSSItem *)NULL;
    }
  }

  if( (NSSUTF8 *)NULL == string ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    return (NSSItem *)NULL;
  }
#endif /* NSSDEBUG */

  switch( type ) {
  case nssStringType_DirectoryString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_TeletexString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_PrintableString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_UniversalString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_BMPString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  case nssStringType_UTF8String:
    {
      NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt);
      if( (NSSUTF8 *)NULL == dup ) {
        return (NSSItem *)NULL;
      }

      if( (NSSItem *)NULL == rvOpt ) {
        rv = nss_ZNEW(arenaOpt, NSSItem);
        if( (NSSItem *)NULL == rv ) {
          (void)nss_ZFreeIf(dup);
          return (NSSItem *)NULL;
        }
      } else {
        rv = rvOpt;
      }

      rv->data = dup;
      dup = (NSSUTF8 *)NULL;
      rv->size = nssUTF8_Size(rv->data, &status);
      if( (0 == rv->size) && (PR_SUCCESS != status) ) {
        if( (NSSItem *)NULL == rvOpt ) {
          (void)nss_ZFreeIf(rv);
        }
        return (NSSItem *)NULL;
      }
    }
    break;
  case nssStringType_PHGString:
    nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */
    break;
  default:
    nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE);
    break;
  }

  return rv;
}

/*
 * nssUTF8_CopyIntoFixedBuffer
 *
 * This will copy a UTF8 string into a fixed-length buffer, making
 * sure that the all characters are valid.  Any remaining space will
 * be padded with the specified ASCII character, typically either
 * null or space.
 *
 * Blah, blah, blah.
 */

NSS_IMPLEMENT PRStatus
nssUTF8_CopyIntoFixedBuffer
(
  NSSUTF8 *string,
  char *buffer,
  PRUint32 bufferSize,
  char pad
)
{
  PRUint32 stringSize = 0;

#ifdef NSSDEBUG
  if( (char *)NULL == buffer ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    return PR_FALSE;
  }

  if( 0 == bufferSize ) {
    nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
    return PR_FALSE;
  }

  if( (pad & 0x80) != 0x00 ) {
    nss_SetError(NSS_ERROR_INVALID_ARGUMENT);
    return PR_FALSE;
  }
#endif /* NSSDEBUG */

  if( (NSSUTF8 *)NULL == string ) {
    string = (NSSUTF8 *) "";
  }

  stringSize = nssUTF8_Size(string, (PRStatus *)NULL);
  stringSize--; /* don't count the trailing null */
  if( stringSize > bufferSize ) {
    PRUint32 bs = bufferSize;
    (void)nsslibc_memcpy(buffer, string, bufferSize);

    if( (            ((buffer[ bs-1 ] & 0x80) == 0x00)) ||
        ((bs > 1) && ((buffer[ bs-2 ] & 0xE0) == 0xC0)) ||
        ((bs > 2) && ((buffer[ bs-3 ] & 0xF0) == 0xE0)) ||
        ((bs > 3) && ((buffer[ bs-4 ] & 0xF8) == 0xF0)) ||
        ((bs > 4) && ((buffer[ bs-5 ] & 0xFC) == 0xF8)) ||
        ((bs > 5) && ((buffer[ bs-6 ] & 0xFE) == 0xFC)) ) {
      /* It fit exactly */
      return PR_SUCCESS;
    }

    /* Too long.  We have to trim the last character */
    for( /*bs*/; bs != 0; bs-- ) {
      if( (buffer[bs-1] & 0xC0) != 0x80 ) {
        buffer[bs-1] = pad;
        break;
      } else {
        buffer[bs-1] = pad;
      }
    }
  } else {
    (void)nsslibc_memset(buffer, pad, bufferSize);
    (void)nsslibc_memcpy(buffer, string, stringSize);
  }

  return PR_SUCCESS;
}

/*
 * nssUTF8_Equal
 *
 */

NSS_IMPLEMENT PRBool
nssUTF8_Equal
(
  const NSSUTF8 *a,
  const NSSUTF8 *b,
  PRStatus *statusOpt
)
{
  PRUint32 la, lb;

#ifdef NSSDEBUG
  if( ((const NSSUTF8 *)NULL == a) ||
      ((const NSSUTF8 *)NULL == b) ) {
    nss_SetError(NSS_ERROR_INVALID_POINTER);
    if( (PRStatus *)NULL != statusOpt ) {
      *statusOpt = PR_FAILURE;
    }
    return PR_FALSE;
  }
#endif /* NSSDEBUG */

  la = nssUTF8_Size(a, statusOpt);
  if( 0 == la ) {
    return PR_FALSE;
  }

  lb = nssUTF8_Size(b, statusOpt);
  if( 0 == lb ) {
    return PR_FALSE;
  }

  if( la != lb ) {
    return PR_FALSE;
  }

  return nsslibc_memequal(a, b, la, statusOpt);
}