зеркало из https://github.com/github/ruby.git
85 строки
2.2 KiB
C
85 строки
2.2 KiB
C
|
#include "yarp/enc/yp_encoding.h"
|
||
|
|
||
|
typedef uint16_t gbk_codepoint_t;
|
||
|
|
||
|
static gbk_codepoint_t
|
||
|
gbk_codepoint(const char *c, size_t *width) {
|
||
|
const unsigned char *uc = (const unsigned char *) c;
|
||
|
|
||
|
// These are the single byte characters.
|
||
|
if (*uc < 0x80) {
|
||
|
*width = 1;
|
||
|
return *uc;
|
||
|
}
|
||
|
|
||
|
// These are the double byte characters.
|
||
|
if (
|
||
|
((uc[0] >= 0xA1 && uc[0] <= 0xA9) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/1
|
||
|
((uc[0] >= 0xB0 && uc[0] <= 0xF7) && (uc[1] >= 0xA1 && uc[1] <= 0xFE)) || // GBK/2
|
||
|
((uc[0] >= 0x81 && uc[0] <= 0xA0) && (uc[1] >= 0x40 && uc[1] <= 0xFE) && (uc[1] != 0x7F)) || // GBK/3
|
||
|
((uc[0] >= 0xAA && uc[0] <= 0xFE) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) || // GBK/4
|
||
|
((uc[0] >= 0xA8 && uc[0] <= 0xA9) && (uc[1] >= 0x40 && uc[1] <= 0xA0) && (uc[1] != 0x7F)) // GBK/5
|
||
|
) {
|
||
|
*width = 2;
|
||
|
return (gbk_codepoint_t) (uc[0] << 8 | uc[1]);
|
||
|
}
|
||
|
|
||
|
*width = 0;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static size_t
|
||
|
yp_encoding_gbk_char_width(const char *c) {
|
||
|
size_t width;
|
||
|
gbk_codepoint(c, &width);
|
||
|
|
||
|
return width;
|
||
|
}
|
||
|
|
||
|
static size_t
|
||
|
yp_encoding_gbk_alpha_char(const char *c) {
|
||
|
size_t width;
|
||
|
gbk_codepoint_t codepoint = gbk_codepoint(c, &width);
|
||
|
|
||
|
if (width == 1) {
|
||
|
const char value = (const char) codepoint;
|
||
|
return yp_encoding_ascii_alpha_char(&value);
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static size_t
|
||
|
yp_encoding_gbk_alnum_char(const char *c) {
|
||
|
size_t width;
|
||
|
gbk_codepoint_t codepoint = gbk_codepoint(c, &width);
|
||
|
|
||
|
if (width == 1) {
|
||
|
const char value = (const char) codepoint;
|
||
|
return yp_encoding_ascii_alnum_char(&value);
|
||
|
} else {
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static bool
|
||
|
yp_encoding_gbk_isupper_char(const char *c) {
|
||
|
size_t width;
|
||
|
gbk_codepoint_t codepoint = gbk_codepoint(c, &width);
|
||
|
|
||
|
if (width == 1) {
|
||
|
const char value = (const char) codepoint;
|
||
|
return yp_encoding_ascii_isupper_char(&value);
|
||
|
} else {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
yp_encoding_t yp_encoding_gbk = {
|
||
|
.name = "gbk",
|
||
|
.char_width = yp_encoding_gbk_char_width,
|
||
|
.alnum_char = yp_encoding_gbk_alnum_char,
|
||
|
.alpha_char = yp_encoding_gbk_alpha_char,
|
||
|
.isupper_char = yp_encoding_gbk_isupper_char
|
||
|
};
|