Proper support for using the font's own character encoding. If we

know what that encoding actually is, we can do our best to support
additional charsets (VT100 linedrawing, SCO ACS, UTF-8 mode) using
the available characters; if we don't, we fall back to a mode where
we disable all Unicode cut-and-paste and assume any Unicode
character is undisplayable.

[originally from svn r2413]
This commit is contained in:
Simon Tatham 2003-01-01 22:25:25 +00:00
Родитель d97ac46223
Коммит cab553028a
6 изменённых файлов: 138 добавлений и 61 удалений

Просмотреть файл

@ -589,7 +589,7 @@ extern char ver[];
#ifndef CP_UTF8
#define CP_UTF8 65001
#endif
void init_ucs(void);
/* void init_ucs(void); -- this is now in platform-specific headers */
int is_dbcs_leadbyte(int codepage, char byte);
int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
wchar_t *wcstr, int wclen);

Просмотреть файл

@ -110,6 +110,10 @@ supported by \fIpterm\fP) should be valid here (examples are
encoding which is valid in an X logical font description should be
valid ("ibm-cp437", for example).
\fIpterm\fP's default behaviour is to use the same character
encoding as its primary font. If you supply a Unicode (iso10646-1)
font, it will default to the UTF-8 character set.
Character set names are case-insensitive.
.IP "\fB\-nethack\fP"
Tells \fIpterm\fP to enable NetHack keypad mode, in which the

Просмотреть файл

@ -836,18 +836,27 @@ gint key_event(GtkWidget *widget, GdkEventKey *event, gpointer data)
printf("\n");
#endif
/*
* The stuff we've just generated is assumed to be
* ISO-8859-1! This sounds insane, but `man XLookupString'
* agrees: strings of this type returned from the X server
* are hardcoded to 8859-1. Strictly speaking we should be
* doing this using some sort of GtkIMContext, which (if
* we're lucky) would give us our data directly in Unicode;
* but that's not supported in GTK 1.2 as far as I can
* tell, and it's poorly documented even in 2.0, so it'll
* have to wait.
*/
lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
if (inst->fontinfo[0].charset != CS_NONE) {
/*
* The stuff we've just generated is assumed to be
* ISO-8859-1! This sounds insane, but `man
* XLookupString' agrees: strings of this type returned
* from the X server are hardcoded to 8859-1. Strictly
* speaking we should be doing this using some sort of
* GtkIMContext, which (if we're lucky) would give us
* our data directly in Unicode; but that's not
* supported in GTK 1.2 as far as I can tell, and it's
* poorly documented even in 2.0, so it'll have to
* wait.
*/
lpage_send(inst->ldisc, CS_ISO8859_1, output+start, end-start, 1);
} else {
/*
* In direct-to-font mode, we just send the string
* exactly as we received it.
*/
ldisc_send(inst->ldisc, output+start, end-start, 1);
}
show_mouseptr(inst, 0);
term_seen_key_event(inst->term);
@ -1218,17 +1227,25 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
if (inst->pasteout_data_utf8)
sfree(inst->pasteout_data_utf8);
inst->pasteout_data_utf8 = smalloc(len*6);
inst->pasteout_data_utf8_len = len*6;
{
/*
* Set up UTF-8 paste data. This only happens if we aren't in
* direct-to-font mode using the D800 hack.
*/
if (inst->fontinfo[0].charset != CS_NONE) {
wchar_t *tmp = data;
int tmplen = len;
inst->pasteout_data_utf8 = smalloc(len*6);
inst->pasteout_data_utf8_len = len*6;
inst->pasteout_data_utf8_len =
charset_from_unicode(&tmp, &tmplen, inst->pasteout_data_utf8,
inst->pasteout_data_utf8_len,
CS_UTF8, NULL, NULL, 0);
inst->pasteout_data_utf8 =
srealloc(inst->pasteout_data_utf8, inst->pasteout_data_utf8_len);
} else {
inst->pasteout_data_utf8 = NULL;
inst->pasteout_data_utf8_len = 0;
}
inst->pasteout_data = smalloc(len);
@ -1243,8 +1260,9 @@ void write_clip(void *frontend, wchar_t * data, int len, int must_deselect)
GDK_SELECTION_TYPE_STRING, 1);
gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
inst->compound_text_atom, 1);
gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
inst->utf8_string_atom, 1);
if (inst->pasteout_data_utf8)
gtk_selection_add_target(inst->area, GDK_SELECTION_PRIMARY,
inst->utf8_string_atom, 1);
}
}
@ -1286,15 +1304,24 @@ void request_paste(void *frontend)
* comes back _then_ we can call term_do_paste().
*/
/*
* First we attempt to retrieve the selection as a UTF-8 string
* (which we will convert to the correct code page before
* sending to the session, of course). If that fails,
* selection_received() will be informed and will fall back to
* an ordinary string.
*/
gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
inst->utf8_string_atom, GDK_CURRENT_TIME);
if (inst->fontinfo[0].charset != CS_NONE) {
/*
* First we attempt to retrieve the selection as a UTF-8
* string (which we will convert to the correct code page
* before sending to the session, of course). If that
* fails, selection_received() will be informed and will
* fall back to an ordinary string.
*/
gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
inst->utf8_string_atom, GDK_CURRENT_TIME);
} else {
/*
* If we're in direct-to-font mode, we disable UTF-8
* pasting, and go straight to ordinary string data.
*/
gtk_selection_convert(inst->area, GDK_SELECTION_PRIMARY,
GDK_SELECTION_TYPE_STRING, GDK_CURRENT_TIME);
}
}
gint idle_paste_func(gpointer data); /* forward ref */
@ -1562,12 +1589,9 @@ void do_text_internal(Context ctx, int x, int y, char *text, int len,
gwcs, len*2);
sfree(gwcs);
} else {
wchar_t *wcstmp = wcs;
int lentmp = len;
gcs = smalloc(sizeof(GdkWChar) * (len+1));
charset_from_unicode(&wcstmp, &lentmp, gcs, len,
inst->fontinfo[fontid].charset,
NULL, ".", 1);
wc_to_mb(inst->fontinfo[fontid].charset, 0,
wcs, len, gcs, len, ".", NULL);
gdk_draw_text(inst->pixmap, inst->fonts[fontid], gc,
x*inst->font_width+cfg.window_border,
y*inst->font_height+cfg.window_border+inst->fonts[0]->ascent,
@ -2101,13 +2125,20 @@ static void block_signal(int sig, int block_it) {
}
}
static void set_font_info(struct gui_data *inst, int fontid)
/*
* This function retrieves the character set encoding of a font. It
* returns the character set without the X11 hack (in case the user
* asks to use the font's own encoding).
*/
static int set_font_info(struct gui_data *inst, int fontid)
{
GdkFont *font = inst->fonts[fontid];
XFontStruct *xfs = GDK_FONT_XFONT(font);
Display *disp = GDK_FONT_XDISPLAY(font);
Atom charset_registry, charset_encoding;
unsigned long registry_ret, encoding_ret;
int retval = CS_NONE;
charset_registry = XInternAtom(disp, "CHARSET_REGISTRY", False);
charset_encoding = XInternAtom(disp, "CHARSET_ENCODING", False);
inst->fontinfo[fontid].charset = CS_NONE;
@ -2119,10 +2150,13 @@ static void set_font_info(struct gui_data *inst, int fontid)
enc = XGetAtomName(disp, (Atom)encoding_ret);
if (reg && enc) {
char *encoding = dupcat(reg, "-", enc, NULL);
inst->fontinfo[fontid].charset = charset_from_xenc(encoding);
retval = inst->fontinfo[fontid].charset =
charset_from_xenc(encoding);
/* FIXME: when libcharset supports wide encodings fix this. */
if (!strcasecmp(encoding, "iso10646-1"))
if (!strcasecmp(encoding, "iso10646-1")) {
inst->fontinfo[fontid].is_wide = 1;
retval = CS_UTF8;
}
/*
* Hack for X line-drawing characters: if the primary
@ -2148,19 +2182,11 @@ static void set_font_info(struct gui_data *inst, int fontid)
inst->fontinfo[fontid].charset = CS_ISO8859_1_X11;
}
/*
* FIXME: this is a hack. Currently fonts with
* incomprehensible encodings are dealt with by
* pretending they're 8859-1. It's ugly, but it's good
* enough to stop things crashing. Should do something
* better here.
*/
if (inst->fontinfo[fontid].charset == CS_NONE)
inst->fontinfo[fontid].charset = CS_ISO8859_1;
sfree(encoding);
}
}
return retval;
}
int main(int argc, char **argv)
@ -2168,6 +2194,7 @@ int main(int argc, char **argv)
extern int pty_master_fd; /* declared in pty.c */
extern void pty_pre_init(void); /* declared in pty.c */
struct gui_data *inst;
int font_charset;
/* defer any child exit handling until we're ready to deal with
* it */
@ -2195,7 +2222,7 @@ int main(int argc, char **argv)
fprintf(stderr, "pterm: unable to load font \"%s\"\n", cfg.font);
exit(1);
}
set_font_info(inst, 0);
font_charset = set_font_info(inst, 0);
if (cfg.boldfont[0]) {
inst->fonts[1] = gdk_font_load(cfg.boldfont);
if (!inst->fonts[1]) {
@ -2233,7 +2260,7 @@ int main(int argc, char **argv)
inst->compound_text_atom = gdk_atom_intern("COMPOUND_TEXT", FALSE);
inst->utf8_string_atom = gdk_atom_intern("UTF8_STRING", FALSE);
init_ucs();
init_ucs(font_charset);
inst->window = gtk_window_new(GTK_WINDOW_TOPLEVEL);

Просмотреть файл

@ -66,4 +66,9 @@ int next_socket(int *state, int *rwx);
/* BSD-semantics version of signal() */
void (*putty_signal(int sig, void (*func)(int)))(int);
/*
* Exports from unicode.c.
*/
void init_ucs(int font_charset);
#endif

Просмотреть файл

@ -40,6 +40,17 @@ int mb_to_wc(int codepage, int flags, char *mbstr, int mblen,
setlocale(LC_CTYPE, "C");
return n;
} else if (codepage == CS_NONE) {
int n = 0;
while (mblen > 0) {
wcstr[n] = 0xD800 | (mbstr[0] & 0xFF);
n++;
mbstr++;
mblen--;
}
return n;
} else
return charset_to_unicode(&mbstr, &mblen, wcstr, wclen, codepage,
@ -73,12 +84,24 @@ int wc_to_mb(int codepage, int flags, wchar_t *wcstr, int wclen,
setlocale(LC_CTYPE, "C");
return n;
} else
} else if (codepage == CS_NONE) {
int n = 0;
while (wclen > 0 && n < mblen) {
if (*wcstr >= 0xD800 && *wcstr < 0xD900)
mbstr[n++] = (*wcstr & 0xFF);
else if (defchr)
mbstr[n++] = *defchr;
wcstr++;
wclen--;
}
return n;
} else {
return charset_from_unicode(&wcstr, &wclen, mbstr, mblen, codepage,
NULL, NULL, 0);
}
}
void init_ucs(void)
void init_ucs(int font_charset)
{
int i;
@ -97,14 +120,16 @@ void init_ucs(void)
line_codepage = charset_from_mimeenc(cfg.line_codepage);
if (line_codepage == CS_NONE)
line_codepage = charset_from_xenc(cfg.line_codepage);
/* If it's still CS_NONE, we should assume direct-to-font. */
/* FIXME: this is a hack. Currently fonts with incomprehensible
* encodings are dealt with by pretending they're 8859-1. It's
* ugly, but it's good enough to stop things crashing. Should do
* something better here. */
/*
* If line_codepage is _still_ CS_NONE, we assume we're using
* the font's own encoding. This has been passed in to us, so
* we use that. If it's still CS_NONE after _that_ - i.e. the
* font we were given had an incomprehensible charset - then we
* fall back to using the D800 page.
*/
if (line_codepage == CS_NONE)
line_codepage = CS_ISO8859_1;
line_codepage = font_charset;
/*
* Set up unitab_line, by translating each individual character
@ -117,7 +142,10 @@ void init_ucs(void)
c[0] = i;
p = c;
len = 1;
if (1 == charset_to_unicode(&p,&len,wc,1,line_codepage,NULL,L"",0))
if (line_codepage == CS_NONE)
unitab_line[i] = 0xD800 | i;
else if (1 == charset_to_unicode(&p, &len, wc, 1, line_codepage,
NULL, L"", 0))
unitab_line[i] = wc[0];
else
unitab_line[i] = 0xFFFD;
@ -157,17 +185,25 @@ void init_ucs(void)
c[0] = i;
p = c;
len = 1;
if (1 == charset_to_unicode(&p,&len,wc,1,CS_CP437,NULL,L"",0))
if (1 == charset_to_unicode(&p, &len, wc, 1, CS_CP437, NULL, L"", 0))
unitab_scoacs[i] = wc[0];
else
unitab_scoacs[i] = 0xFFFD;
}
/* Find the line control characters. */
for (i = 0; i < 256; i++)
if (unitab_line[i] < ' '
|| (unitab_line[i] >= 0x7F && unitab_line[i] < 0xA0))
/*
* Find the control characters in the line codepage. For
* direct-to-font mode using the D800 hack, we assume 00-1F and
* 7F are controls, but allow 80-9F through. (It's as good a
* guess as anything; and my bet is that half the weird fonts
* used in this way will be IBM or MS code pages anyway.)
*/
for (i = 0; i < 256; i++) {
int lineval = unitab_line[i];
if (lineval < ' ' || (lineval >= 0x7F && lineval < 0xA0) ||
(lineval >= 0xD800 && lineval < 0xD820) || (lineval == 0xD87F))
unitab_ctrl[i] = i;
else
unitab_ctrl[i] = 0xFF;
}
}

Просмотреть файл

@ -189,4 +189,9 @@ void force_normal(HWND hwnd);
void UpdateSizeTip(HWND src, int cx, int cy);
void EnableSizeTip(int bEnable);
/*
* Exports from unicode.c.
*/
void init_ucs(void);
#endif