pjs/lib/libi18n/intlcomp.c

379 строки
11 KiB
C

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "NPL"); you may not use this file except in
* compliance with the NPL. You may obtain a copy of the NPL at
* http://www.mozilla.org/NPL/
*
* Software distributed under the NPL is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
* for the specific language governing rights and limitations under the
* NPL.
*
* The Initial Developer of this code under the NPL is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
* Reserved.
*/
/* intlcomp.c */
/*
This file implement
INTL_MatchOneChar
INTL_MatchOneCaseChar
INTL_Strstr
INTL_Strcasestr
*/
#include "intlpriv.h"
#include "pintlcmp.h"
#define CHECK_CSID_AND_ASSERT(csid) \
{ \
XP_ASSERT(CS_UNKNOWN != (csid)); /* Please don't pass in CS_UNKNOWN here, you need to know the csid */ \
XP_ASSERT(CS_DEFAULT != (csid)); /* Please don't pass in CS_DEFAULT here, you need to know the csid */ \
XP_ASSERT(CS_ASCII != (csid)); /* Please don't pass in CS_ASCII here, you need to know the csid */ \
}
/* Private Function Prototype */
extern unsigned char lower_lookup_ascii[];
#define INTL_SingleByteToLower(lower, ch) ((ch & 0x80) ? (lower[(ch & 0x7f)]) : (lower_lookup_ascii[ch]))
MODULE_PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *, unsigned char* , unsigned char* );
PRIVATE void intl_strip_CRLF(unsigned char* str)
{
unsigned char* in;
unsigned char* out;
for(in = out = str; 0 != *in; in++)
{
if((CR != *in) && (LF != *in))
*out++ = *in;
}
*out = 0;
}
/*
Function intl_caseless_normalize
This function have side effect to modify the string it got.
It will normalize the string in a caseless matter
*/
PRIVATE void intl_caseless_normalize(int16 csid, unsigned char* str)
{
unsigned char *sb_tolowermap = INTL_GetSingleByteToLowerMap(csid);
unsigned char *p;
CHECK_CSID_AND_ASSERT(csid);
XP_ASSERT(NULL != str);
intl_strip_CRLF(str);
if(SINGLEBYTE == INTL_CharSetType(csid)) {
/* for singlebyte csid */
for(p = str; *p != 0 ; p++)
*p = INTL_SingleByteToLower(sb_tolowermap, *p);
return;
}
else
{
/* for multibyte csid */
DoubleByteToLowerMap *db_tolowermap = INTL_GetDoubleByteToLowerMap(csid);
unsigned char *p;
int l;
for(p = str; *p != 0; p += l)
{
l = INTL_CharLen(csid ,p); /* *** FIX ME: IMPROVE PERFORMANCE */
switch(l)
{
case 1:
*p = INTL_SingleByteToLower(sb_tolowermap, *p);
break;
case 2:
if(0 == *(p+1))
{
/* Check weather we hit partial characters. This happen when we use wrong csid */
/* However, we should not pass array bondary even we use wrong csid */
XP_ASSERT(FALSE);
return; /* get partial characters, return */
}
INTL_DoubleByteToLower(db_tolowermap, p, p);
break;
default:
{
unsigned char *ck;
/* Check weather we hit partial characters. This happen when we use wrong csid */
/* However, we should not pass array bondary even we use wrong csid */
for(ck = p+l-1; ck != p ;ck--)
{
if(0 == *ck)
{
XP_ASSERT(FALSE);
return; /* get partial characters, return */
}
}
/* We current do not handle 3 byte normalization. We need to work on this for UTF8 */
}
break;
}
}
}
}
PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *db_tolowermap, unsigned char* lowertext, unsigned char* text)
{
DoubleByteToLowerMap *p;
for(p = db_tolowermap; !((p->src_b1 == 0) && (p->src_b2_start == 0)); p++)
{
if( (p->src_b1 == text[0]) &&
(p->src_b2_start <= text[1] ) &&
(p->src_b2_end >= text[1]) )
{
lowertext[0] = p->dest_b1;
lowertext[1] = text[1] - p->src_b2_start + p->dest_b2_start;
return;
}
else
{ /* The map have to be sorted order to implement a fast search */
if(p->src_b1 > text[0])
break;
else {
if((p->src_b1 == text[0]) && (p->src_b2_start > text[1]))
break;
}
}
}
lowertext[0] = text[0];
lowertext[1] = text[1];
return;
}
PUBLIC XP_Bool INTL_MatchOneChar(int16 csid, unsigned char *text1,unsigned char *text2,int *charlen)
{
if((INTL_CharSetType(csid) == SINGLEBYTE) ) {
unsigned char *sb_tolowermap;
*charlen = 1;
sb_tolowermap = INTL_GetSingleByteToLowerMap(csid);
return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0]));
}
else
{
int l1, l2;
l1 = INTL_CharLen(csid ,text1); /* *** FIX ME: IMPROVE PERFORMANCE */
l2 = INTL_CharLen(csid ,text2); /* *** FIX ME: IMPROVE PERFORMANCE */
if(l1 != l2)
return FALSE;
if(l1 == 1)
{
unsigned char *sb_tolowermap;
*charlen = 1;
sb_tolowermap = INTL_GetSingleByteToLowerMap(csid);
return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0]));
}
else
{
if(l1 == 2)
{
DoubleByteToLowerMap *db_tolowermap;
unsigned char lowertext1[2], lowertext2[2];
*charlen = 2;
db_tolowermap = INTL_GetDoubleByteToLowerMap(csid);
INTL_DoubleByteToLower(db_tolowermap, lowertext1, text1);
INTL_DoubleByteToLower(db_tolowermap, lowertext2, text2);
return( ( lowertext1[0] == lowertext2[0] ) &&
( lowertext1[1] == lowertext2[1] ) );
}
else
{
/* for character which is neither one byte nor two byte, we cannot ignore case for them */
int i;
*charlen = l1;
for(i=0;i<l1;i++)
{
if(text1[i] != text2[i])
return FALSE;
}
return TRUE;
}
}
}
}
PUBLIC XP_Bool INTL_MatchOneCaseChar(int16 csid, unsigned char *text1,unsigned char *text2,int *charlen)
{
if((INTL_CharSetType(csid) == SINGLEBYTE) ) {
*charlen = 1;
return( text1[0]== text2[0]);
}
else
{
int i,len;
*charlen = len = INTL_CharLen(csid, (unsigned char *) text1); /* *** FIX ME: IMPROVE PERFORMANCE */
for(i=0 ; i < len; i++)
{
if(text1[i] != text2[i])
return FALSE;
}
return TRUE;
}
}
PUBLIC
char *INTL_Strstr(int16 csid, const char *s1,const char *s2)
{
int len;
char *p1, *pp1, *p2;
if((s2==NULL) || (*s2 == '\0'))
return (char *)s1;
if((s1==NULL) || (*s1 == '\0'))
return NULL;
for(p1=(char*)s1; *p1 ;p1 = INTL_NextChar(csid ,p1)) /* *** FIX ME: IMPROVE PERFORMANCE */
{
for(p2=(char*)s2, pp1=p1 ;
((*pp1) && (*p2) && INTL_MatchOneCaseChar(csid, (unsigned char*)pp1, (unsigned char*)p2, &len));
pp1 += len, p2 += len) /* *** FIX ME: IMPROVE PERFORMANCE */
; /* do nothing in the loop */
if(*p2 == '\0')
return p1;
}
return NULL;
}
/*
To Do:
We should take advantage of INTL_GetNormalizeStr to improve the performance of this
*/
PUBLIC
char *INTL_Strcasestr(int16 csid, const char *s1, const char *s2)
{
int len;
char *p1, *pp1, *p2;
if((s2==NULL) || (*s2 == '\0'))
return (char *)s1;
if((s1==NULL) || (*s1 == '\0'))
return NULL;
for(p1=(char*)s1; *p1 ;p1 = INTL_NextChar(csid , p1)) /* *** FIX ME: IMPROVE PERFORMANCE */
{
for(p2=(char*)s2, pp1=p1 ;
((*pp1) && (*p2) && INTL_MatchOneChar(csid, (unsigned char*)pp1, (unsigned char*)p2, &len));
pp1 += len, p2 += len) /* *** FIX ME: IMPROVE PERFORMANCE */
; /* do nothing in the loop */
if(*p2 == '\0')
return p1;
}
return NULL;
}
PUBLIC unsigned char* INTL_GetNormalizeStr(int16 csid, unsigned char* str)
{
char* n_str = NULL;
StrAllocCopy(n_str, (char*) str);
XP_ASSERT(n_str); /* Should only come here if Memory Not Enough */
CHECK_CSID_AND_ASSERT(csid);
if(NULL != n_str)
intl_caseless_normalize(csid, (unsigned char*)n_str);
return (unsigned char*)n_str;
}
#ifdef MOZ_MAIL_NEWS
PUBLIC unsigned char* INTL_GetNormalizeStrFromRFC1522(int16 csid, unsigned char* rfc1522header)
{
char* n_header = (char*) INTL_DecodeMimePartIIStr((char*)rfc1522header, csid, FALSE);
if(NULL == n_header) /* INTL_DecodeMimePartIIStr() may return NULL- Mean no conversion */
StrAllocCopy(n_header, (char*) rfc1522header);
XP_ASSERT(n_header); /* Should only come here if Memory Not Enough */
CHECK_CSID_AND_ASSERT(csid);
if(NULL != n_header)
intl_caseless_normalize(csid, (unsigned char*)n_header);
return (unsigned char*)n_header;
}
#endif /* MOZ_MAIL_NEWS */
PUBLIC XP_Bool INTL_StrContains(
int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr)
{
/*
It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2
are normalized by calling
INTL_GetNormalizeStr() or
INTL_GetNormalizeStrFromRFC1522()
*/
char* p;
int l_char;
int l_idx;
int l_substr = XP_STRLEN((char*) normalizedSubstr);
int l_str = XP_STRLEN((char*) normalizedStr);
CHECK_CSID_AND_ASSERT(strcsid);
for(p = (char*)normalizedStr, l_idx = 0, l_char = 0; (0 != *p) && (l_idx < l_str) ; p += l_char, l_idx += l_char)
{
l_char = INTL_CharLen(strcsid, (unsigned char*)p); /* *** FIX ME: Should do better tune for performance here */
if(0 == XP_STRNCMP(p, (char*) normalizedSubstr, l_substr))
return TRUE;
}
return FALSE;
}
PUBLIC XP_Bool INTL_StrIs(
int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr)
{
/*
It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2
are normalized by calling
INTL_GetNormalizeStr() or
INTL_GetNormalizeStrFromRFC1522()
*/
CHECK_CSID_AND_ASSERT(strcsid);
return (0 == XP_STRCMP((char*) normalizedStr, (char*) normalizedSubstr));
}
PUBLIC XP_Bool INTL_StrBeginWith(
int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr)
{
/*
It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2
are normalized by calling
INTL_GetNormalizeStr() or
INTL_GetNormalizeStrFromRFC1522()
*/
CHECK_CSID_AND_ASSERT(strcsid);
return (0 == XP_STRNCMP((char*) normalizedStr, (char*) normalizedSubstr, XP_STRLEN((char*) normalizedSubstr)));
}
PUBLIC XP_Bool INTL_StrEndWith(
int16 strcsid, unsigned char* normalizedStr, unsigned char* normalizedSubstr)
{
/*
It is the caller's responsibility to make sure the normalizedstr1 and normalizedstr2
are normalized by calling
INTL_GetNormalizeStr() or
INTL_GetNormalizeStrFromRFC1522()
*/
char* p;
int l_char;
int l_idx;
int l_substr = XP_STRLEN((char*) normalizedSubstr);
int l_str = XP_STRLEN((char*) normalizedStr);
int l_stop = l_str - l_substr;
CHECK_CSID_AND_ASSERT(strcsid);
for(p = (char*)normalizedStr, l_idx = 0, l_char = 0; (0 != *p) && (l_idx < l_stop) ; p += l_char, l_idx += l_char)
l_char = INTL_CharLen(strcsid, (unsigned char*)p); /* *** FIX ME: Should do better tune for performance here */
if(l_idx != l_stop)
return FALSE;
return (0 == XP_STRCMP(p, (char*) normalizedSubstr));
}