/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ /* intlcomp.c */ /* This file implement INTL_MatchOneChar INTL_MatchOneCaseChar INTL_Strstr INTL_Strcasestr */ #include "intlpriv.h" #include "pintlcmp.h" #if defined(XP_MAC) #include #endif #define CHECK_CSID_AND_ASSERT(csid) \ { \ XP_ASSERT(CS_UNKNOWN != (csid)); /* Please don't pass in CS_UNKNOWN here, you need to know the csid */ \ XP_ASSERT(CS_DEFAULT != (csid)); /* Please don't pass in CS_DEFAULT here, you need to know the csid */ \ XP_ASSERT(CS_ASCII != (csid)); /* Please don't pass in CS_ASCII here, you need to know the csid */ \ } /* Private Function Prototype */ extern unsigned char lower_lookup_ascii[]; #define INTL_SingleByteToLower(lower, ch) ((ch & 0x80) ? (lower[(ch & 0x7f)]) : (lower_lookup_ascii[ch])) MODULE_PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *, unsigned char* , unsigned char* ); PRIVATE void intl_strip_CRLF(unsigned char* str) { unsigned char* in; unsigned char* out; for(in = out = str; 0 != *in; in++) { if((CR != *in) && (LF != *in)) *out++ = *in; } *out = 0; } /* Function intl_caseless_normalize This function have side effect to modify the string it got. It will normalize the string in a caseless matter */ PRIVATE void intl_caseless_normalize(int16 csid, unsigned char* str) { unsigned char *sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); unsigned char *p; CHECK_CSID_AND_ASSERT(csid); XP_ASSERT(NULL != str); intl_strip_CRLF(str); if(SINGLEBYTE == INTL_CharSetType(csid)) { /* for singlebyte csid */ for(p = str; *p != 0 ; p++) *p = INTL_SingleByteToLower(sb_tolowermap, *p); return; } else { /* for multibyte csid */ DoubleByteToLowerMap *db_tolowermap = INTL_GetDoubleByteToLowerMap(csid); unsigned char *p; int l; for(p = str; *p != 0; p += l) { l = INTL_CharLen(csid ,p); /* *** FIX ME: IMPROVE PERFORMANCE */ switch(l) { case 1: *p = INTL_SingleByteToLower(sb_tolowermap, *p); break; case 2: if(0 == *(p+1)) { /* Check weather we hit partial characters. This happen when we use wrong csid */ /* However, we should not pass array bondary even we use wrong csid */ XP_ASSERT(FALSE); return; /* get partial characters, return */ } INTL_DoubleByteToLower(db_tolowermap, p, p); break; default: { unsigned char *ck; /* Check weather we hit partial characters. This happen when we use wrong csid */ /* However, we should not pass array bondary even we use wrong csid */ for(ck = p+l-1; ck != p ;ck--) { if(0 == *ck) { XP_ASSERT(FALSE); return; /* get partial characters, return */ } } /* We current do not handle 3 byte normalization. We need to work on this for UTF8 */ } break; } } } } PRIVATE void INTL_DoubleByteToLower(DoubleByteToLowerMap *db_tolowermap, unsigned char* lowertext, unsigned char* text) { DoubleByteToLowerMap *p; for(p = db_tolowermap; !((p->src_b1 == 0) && (p->src_b2_start == 0)); p++) { if( (p->src_b1 == text[0]) && (p->src_b2_start <= text[1] ) && (p->src_b2_end >= text[1]) ) { lowertext[0] = p->dest_b1; lowertext[1] = text[1] - p->src_b2_start + p->dest_b2_start; return; } else { /* The map have to be sorted order to implement a fast search */ if(p->src_b1 > text[0]) break; else { if((p->src_b1 == text[0]) && (p->src_b2_start > text[1])) break; } } } lowertext[0] = text[0]; lowertext[1] = text[1]; return; } PUBLIC XP_Bool INTL_MatchOneChar(int16 csid, unsigned char *text1,unsigned char *text2,int *charlen) { if((INTL_CharSetType(csid) == SINGLEBYTE) ) { unsigned char *sb_tolowermap; *charlen = 1; sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0])); } else { int l1, l2; l1 = INTL_CharLen(csid ,text1); /* *** FIX ME: IMPROVE PERFORMANCE */ l2 = INTL_CharLen(csid ,text2); /* *** FIX ME: IMPROVE PERFORMANCE */ if(l1 != l2) return FALSE; if(l1 == 1) { unsigned char *sb_tolowermap; *charlen = 1; sb_tolowermap = INTL_GetSingleByteToLowerMap(csid); return( INTL_SingleByteToLower(sb_tolowermap,text1[0]) == INTL_SingleByteToLower(sb_tolowermap, text2[0])); } else { if(l1 == 2) { DoubleByteToLowerMap *db_tolowermap; unsigned char lowertext1[2], lowertext2[2]; *charlen = 2; db_tolowermap = INTL_GetDoubleByteToLowerMap(csid); INTL_DoubleByteToLower(db_tolowermap, lowertext1, text1); INTL_DoubleByteToLower(db_tolowermap, lowertext2, text2); return( ( lowertext1[0] == lowertext2[0] ) && ( lowertext1[1] == lowertext2[1] ) ); } else { /* for character which is neither one byte nor two byte, we cannot ignore case for them */ int i; *charlen = l1; for(i=0;i tmp_len) { tmp_len = rt; if (out_string = (unsigned char *) XP_REALLOC((void *) out_string, tmp_len)) rt = strxfrm(out_string, in_string, tmp_len); } /* Return the output of strxfrm if success. */ if (rt != (size_t)-1 && *out_string) out_string = out_string; else XP_FREEIF(out_string); } return out_string; } #endif /* XP_WIN32 */ #if defined(XP_MAC) static char *FEINTL_CreateCollationKeyUsingOS(const char *in_string, int16 wincsid) { char *out_string = NULL; char *temp_string; int in_string_len = XP_STRLEN(in_string); int i; /* Currently only supports Latin1. */ if (wincsid != CS_ASCII && wincsid != CS_LATIN1 && wincsid != CS_DEFAULT) return NULL; /* INTL_ConvertLineWithoutAutoDetect may alter input string. */ temp_string = XP_STRDUP(in_string); if (temp_string != NULL) { /* Convert to MacRoman. */ out_string = (char *) INTL_ConvertLineWithoutAutoDetect (wincsid, CS_MAC_ROMAN, (unsigned char *) temp_string, in_string_len); /* Set the converted string if conversion was applied and the input string was not altered. */ if (out_string != NULL && out_string != temp_string) { XP_FREE(temp_string); temp_string = out_string; } out_string = (char *) XP_ALLOC(in_string_len * 2 + 1); if (out_string != NULL) { /* Copy original string. */ for (i = 0; i < in_string_len; i++) out_string[i*2+1] = temp_string[i]; UppercaseStripDiacritics(temp_string, in_string_len, FontToScript(1)); /* Copy uppercased string. */ for (i = 0; i < in_string_len; i++) out_string[i*2] = temp_string[i]; /* Terminate the string. */ out_string[in_string_len * 2] = '\0'; } XP_FREE(temp_string); } return out_string; } #endif /* XP_MAC */ #if defined(XP_UNIX) static char *FEINTL_CreateCollationKeyUsingOS(const char *in_string, int16 wincsid) { return NULL; } #endif /* XP_UNIX */ static char *INTL_CreateCollationKeyUsingOS(const char *in_string, int16 wincsid) { char *out_string; unsigned char *tmp; /* Create a collatable string */ if (INTL_CharSetType(wincsid) == SINGLEBYTE) { /* Front End call to create a collation key. */ out_string = FEINTL_CreateCollationKeyUsingOS(in_string, wincsid); if (out_string) return out_string; /* Otherwise just lowercase the string. */ out_string = XP_STRDUP(in_string); tmp = (unsigned char *) out_string; while (*tmp) { *tmp = (unsigned char) XP_TO_LOWER((int)*tmp); tmp++; } } else if (wincsid & MULTIBYTE) { out_string = XP_STRDUP(in_string); tmp = (unsigned char *) out_string; while (*tmp) { /* Lower case for Ascii */ if (*tmp < 128) { *tmp = (unsigned char) XP_TO_LOWER((int)*tmp); tmp++; } else { int bytes = INTL_CharLen(wincsid, tmp); /* ShiftJIS specific, shift hankaku kana in front of zenkaku. */ if (wincsid == CS_SJIS) { if (*tmp >= 0xA0 && *tmp < 0xE0) { *tmp -= (0xA0 - 0x81); } else if (*tmp >= 0x81 && *tmp < 0xA0) { *tmp += (0xA0 - 0x81); } } tmp += bytes; } } } return out_string; } #if defined(LIBNLS_COLLATE) static Collation *collation = NULL; static char *INTL_CreateCollationKeyUsingLibNLS(const char *in_string, int16 wincsid) { return NULL; } #endif /* LIBNLS_COLLATE */ /* * Create a collation key using default system locale. */ PUBLIC char *INTL_CreateCollationKeyByDefaultLocale(const char *in_string, int16 wincsid, int32 collation_flag) { char *out_string; /* For future enhancement */ collation_flag = 0; /* CS_DEFAULT is not accepted by i18n unicode converter. * In future, this should be taken care by the caller. */ if (CS_DEFAULT == wincsid) wincsid = CS_LATIN1; /* Create a collation key. */ #if defined(LIBNLS_COLLATE) out_string = INTL_CreateCollationKeyUsingLibNLS(in_string); #else out_string = INTL_CreateCollationKeyUsingOS(in_string, wincsid); #endif return out_string; } /* * Compare two collation keys. */ PUBLIC int INTL_Compare_CollationKey(const char *key1, const char *key2) { return XP_MEMCMP((const void *) key1, (const void *) key2, XP_STRLEN(key1)); } /* * Decode, convert and create a message header. Then create and return a collatable string. */ PUBLIC char *INTL_DecodeMimePartIIAndCreateCollationKey(const char *header, int16 wincsid, int32 collation_flag) { char *temp_string; char *decoded_string; char *out_string; /* For future enhancement */ collation_flag = 0; /* Allocate the temp string because INTL_DecodeMimePartIIStr may alter input string. */ temp_string = XP_STRDUP(header); if (temp_string == NULL) return NULL; /* Decode and Convert */ decoded_string = INTL_DecodeMimePartIIStr(temp_string, wincsid, FALSE); /* Free the temp string. */ if (decoded_string != temp_string) XP_FREE(temp_string); /* No decode or conversion done, allocate for out string. */ if (decoded_string == NULL) decoded_string = XP_STRDUP(header); if (decoded_string == NULL) return NULL; /* Create a collation key. */ out_string = INTL_CreateCollationKeyByDefaultLocale(decoded_string, wincsid, collation_flag); /* Return decoded string in case no collation key created. */ if (out_string != NULL) XP_FREE(decoded_string); else out_string = decoded_string; return out_string; } #endif /* MOZ_MAIL_NEWS */