1998-05-28 08:23:42 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
|
|
*
|
1999-11-02 04:46:24 +03:00
|
|
|
* The contents of this file are subject to the Netscape Public
|
|
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy of
|
|
|
|
* the License at http://www.mozilla.org/NPL/
|
1998-05-28 08:23:42 +04:00
|
|
|
*
|
1999-11-02 04:46:24 +03:00
|
|
|
* Software distributed under the License is distributed on an "AS
|
|
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
|
|
* implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
1998-05-28 08:23:42 +04:00
|
|
|
*
|
1999-11-02 04:46:24 +03:00
|
|
|
* The Original Code is mozilla.org code.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is Netscape
|
1998-05-28 08:23:42 +04:00
|
|
|
* Communications Corporation. Portions created by Netscape are
|
1999-11-02 04:46:24 +03:00
|
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
|
|
* Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s):
|
1998-05-28 08:23:42 +04:00
|
|
|
*/
|
|
|
|
/* uft8.c - misc. utf8 "string" functions. */
|
|
|
|
#include "ldap.h"
|
1998-11-23 01:03:18 +03:00
|
|
|
#include "ldap-int.h"
|
1998-05-28 08:23:42 +04:00
|
|
|
|
|
|
|
static char UTF8len[64]
|
|
|
|
= {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6};
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8len (const char* s)
|
|
|
|
/* Return the number of char's in the character at *s. */
|
|
|
|
{
|
|
|
|
return ldap_utf8next((char*)s) - s;
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8next (char* s)
|
|
|
|
/* Return a pointer to the character immediately following *s.
|
|
|
|
Handle any valid UTF-8 character, including '\0' and ASCII.
|
|
|
|
Try to handle a misaligned pointer or a malformed character.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
register unsigned char* next = (unsigned char*)s;
|
|
|
|
switch (UTF8len [(*next >> 2) & 0x3F]) {
|
|
|
|
case 0: /* erroneous: s points to the middle of a character. */
|
|
|
|
case 6: if ((*++next & 0xC0) != 0x80) break;
|
|
|
|
case 5: if ((*++next & 0xC0) != 0x80) break;
|
|
|
|
case 4: if ((*++next & 0xC0) != 0x80) break;
|
|
|
|
case 3: if ((*++next & 0xC0) != 0x80) break;
|
|
|
|
case 2: if ((*++next & 0xC0) != 0x80) break;
|
|
|
|
case 1: ++next;
|
|
|
|
}
|
|
|
|
return (char*) next;
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8prev (char* s)
|
|
|
|
/* Return a pointer to the character immediately preceding *s.
|
|
|
|
Handle any valid UTF-8 character, including '\0' and ASCII.
|
|
|
|
Try to handle a misaligned pointer or a malformed character.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
register unsigned char* prev = (unsigned char*)s;
|
|
|
|
unsigned char* limit = prev - 6;
|
|
|
|
while (((*--prev & 0xC0) == 0x80) && (prev != limit)) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
return (char*) prev;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8copy (char* dst, const char* src)
|
|
|
|
/* Copy a character from src to dst; return the number of char's copied.
|
|
|
|
Handle any valid UTF-8 character, including '\0' and ASCII.
|
|
|
|
Try to handle a misaligned pointer or a malformed character.
|
|
|
|
*/
|
|
|
|
{
|
|
|
|
register const unsigned char* s = (const unsigned char*)src;
|
|
|
|
switch (UTF8len [(*s >> 2) & 0x3F]) {
|
|
|
|
case 0: /* erroneous: s points to the middle of a character. */
|
|
|
|
case 6: *dst++ = *s++; if ((*s & 0xC0) != 0x80) break;
|
|
|
|
case 5: *dst++ = *s++; if ((*s & 0xC0) != 0x80) break;
|
|
|
|
case 4: *dst++ = *s++; if ((*s & 0xC0) != 0x80) break;
|
|
|
|
case 3: *dst++ = *s++; if ((*s & 0xC0) != 0x80) break;
|
|
|
|
case 2: *dst++ = *s++; if ((*s & 0xC0) != 0x80) break;
|
|
|
|
case 1: *dst = *s++;
|
|
|
|
}
|
|
|
|
return s - (const unsigned char*)src;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8characters (const char* src)
|
|
|
|
/* Return the number of UTF-8 characters in the 0-terminated array s. */
|
|
|
|
{
|
|
|
|
register char* s = (char*)src;
|
|
|
|
size_t n;
|
|
|
|
for (n = 0; *s; LDAP_UTF8INC(s)) ++n;
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long LDAP_CALL
|
|
|
|
ldap_utf8getcc( const char** src )
|
|
|
|
{
|
|
|
|
register unsigned long c;
|
|
|
|
register const unsigned char* s = (const unsigned char*)*src;
|
|
|
|
switch (UTF8len [(*s >> 2) & 0x3F]) {
|
|
|
|
case 0: /* erroneous: s points to the middle of a character. */
|
|
|
|
c = (*s++) & 0x3F; goto more5;
|
|
|
|
case 1: c = (*s++); break;
|
|
|
|
case 2: c = (*s++) & 0x1F; goto more1;
|
|
|
|
case 3: c = (*s++) & 0x0F; goto more2;
|
|
|
|
case 4: c = (*s++) & 0x07; goto more3;
|
|
|
|
case 5: c = (*s++) & 0x03; goto more4;
|
|
|
|
case 6: c = (*s++) & 0x01; goto more5;
|
|
|
|
more5: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
|
|
|
|
more4: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
|
|
|
|
more3: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
|
|
|
|
more2: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
|
|
|
|
more1: if ((*s & 0xC0) != 0x80) break; c = (c << 6) | ((*s++) & 0x3F);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
*src = (const char*)s;
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8strtok_r( char* sp, const char* brk, char** next)
|
|
|
|
{
|
|
|
|
const char *bp;
|
|
|
|
unsigned long sc, bc;
|
|
|
|
char *tok;
|
|
|
|
|
|
|
|
if (sp == NULL && (sp = *next) == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* Skip leading delimiters; roughly, sp += strspn(sp, brk) */
|
|
|
|
cont:
|
|
|
|
sc = LDAP_UTF8GETC(sp);
|
|
|
|
for (bp = brk; (bc = LDAP_UTF8GETCC(bp)) != 0;) {
|
|
|
|
if (sc == bc)
|
|
|
|
goto cont;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sc == 0) { /* no non-delimiter characters */
|
|
|
|
*next = NULL;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
tok = LDAP_UTF8PREV(sp);
|
|
|
|
|
|
|
|
/* Scan token; roughly, sp += strcspn(sp, brk)
|
|
|
|
* Note that brk must be 0-terminated; we stop if we see that, too.
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
sc = LDAP_UTF8GETC(sp);
|
|
|
|
bp = brk;
|
|
|
|
do {
|
|
|
|
if ((bc = LDAP_UTF8GETCC(bp)) == sc) {
|
|
|
|
if (sc == 0) {
|
|
|
|
*next = NULL;
|
|
|
|
} else {
|
|
|
|
*next = sp;
|
|
|
|
*(LDAP_UTF8PREV(sp)) = 0;
|
|
|
|
}
|
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
} while (bc != 0);
|
|
|
|
}
|
|
|
|
/* NOTREACHED */
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8isalnum( char* s )
|
|
|
|
{
|
|
|
|
register unsigned char c = *(unsigned char*)s;
|
|
|
|
if (0x80 & c) return 0;
|
|
|
|
if (c >= 'A' && c <= 'Z') return 1;
|
|
|
|
if (c >= 'a' && c <= 'z') return 1;
|
|
|
|
if (c >= '0' && c <= '9') return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8isalpha( char* s )
|
|
|
|
{
|
|
|
|
register unsigned char c = *(unsigned char*)s;
|
|
|
|
if (0x80 & c) return 0;
|
|
|
|
if (c >= 'A' && c <= 'Z') return 1;
|
|
|
|
if (c >= 'a' && c <= 'z') return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8isdigit( char* s )
|
|
|
|
{
|
|
|
|
register unsigned char c = *(unsigned char*)s;
|
|
|
|
if (0x80 & c) return 0;
|
|
|
|
if (c >= '0' && c <= '9') return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8isxdigit( char* s )
|
|
|
|
{
|
|
|
|
register unsigned char c = *(unsigned char*)s;
|
|
|
|
if (0x80 & c) return 0;
|
|
|
|
if (c >= '0' && c <= '9') return 1;
|
|
|
|
if (c >= 'A' && c <= 'F') return 1;
|
|
|
|
if (c >= 'a' && c <= 'f') return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
LDAP_CALL
|
|
|
|
ldap_utf8isspace( char* s )
|
|
|
|
{
|
|
|
|
register unsigned char c = *(unsigned char*)s;
|
|
|
|
if (0x80 & c) return 0;
|
|
|
|
switch (c) {
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
case '\n':
|
|
|
|
case '\r':
|
|
|
|
case '\v':
|
|
|
|
case '\f':
|
|
|
|
return 1;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|