I did real UTF-8 handling in ELinks. It encodes characters as unicode (two bytes long) in comparison to current state it gives 33% or 25% in 256 color mode bigger memory consumption. Characters are encoded only in UTF-8 mode. In standard mode there is no change. I know that utf-8 mode could be optional, but patch is big enough now. When chunks of data breaks in a middle of UTF-8 character, some garbage may appear on the screen. I have no idea how to avoid it. In plain text mode lines with UTF-8 characters are shorter (easier to read :)). It's too complicated to make it right. Anyway I think is much better than what, what is now. You can test it with xterm -en utf8 -u8 or on console. $ unicode_start LatArCyrHeb-16 $ loadkeys --unicode Set terminal charset as UTF-8 and set I/O in UTF-8. I have tested it few days and it worked. Merry Christmas and happy New Year! Witek diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/bfu/button.c elinks.2/src/bfu/button.c --- elinks/src/bfu/button.c 2005-12-02 10:57:10.646774500 +0100 +++ elinks.2/src/bfu/button.c 2005-12-22 17:54:00.783027000 +0100 @@ -167,6 +167,31 @@ attr = get_opt_bool("ui.dialogs.underline_button_shortcuts") ? SCREEN_ATTR_UNDERLINE : 0; + if (term->utf8) { + unsigned char *text2 = text; + unsigned char *end = text + + widget_data->widget->info.button.truetextlen; + int hk_state = 0; + int x1; + + for (x1 = 0; x1 - !!hk_state < len && *text2; x1++) { + uint16_t data; + + data = (uint16_t)utf_8_to_unicode(&text2, end); + if (!hk_state && (int)(text2 - text) == hk_pos + 1) { + hk_state = 1; + continue; + } + if (hk_state == 1) { + draw_char(term, x + x1 - 1, pos->y, data, attr, shortcut_color); + hk_state = 2; + } else { + draw_char(term, x + x1 - !!hk_state, pos->y, data, 0, color); + } + + } + len = x1 - !!hk_state; + } else if (hk_pos >= 0) { int right = widget_data->widget->info.button.truetextlen - hk_pos - 1; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/bfu/dialog.c elinks.2/src/bfu/dialog.c --- elinks/src/bfu/dialog.c 2005-12-02 10:57:10.650774750 +0100 +++ elinks.2/src/bfu/dialog.c 2005-12-22 17:54:00.791027500 +0100 @@ -12,6 +12,7 @@ #include "bfu/dialog.h" #include "config/kbdbind.h" #include "config/options.h" +#include "intl/charsets.h" #include "intl/gettext/libintl.h" #include "terminal/draw.h" #include "main/timer.h" @@ -94,13 +95,18 @@ title_color = get_bfu_color(term, "dialog.title"); if (title_color && box.width > 2) { unsigned char *title = dlg_data->dlg->title; - int titlelen = int_min(box.width - 2, strlen(title)); - int x = (box.width - titlelen) / 2 + box.x; + unsigned char *t2 = title; + int titlelen = strlen(title); + int len = term->utf8 ? strlen_utf8(&t2) : titlelen; +#if 1 + len = int_min(box.width - 2, len); +#endif + int x = (box.width - len) / 2 + box.x; int y = box.y - 1; draw_text(term, x - 1, y, " ", 1, 0, title_color); draw_text(term, x, y, title, titlelen, 0, title_color); - draw_text(term, x + titlelen, y, " ", 1, 0, title_color); + draw_text(term, x + len, y, " ", 1, 0, title_color); } } diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/bfu/inpfield.c elinks.2/src/bfu/inpfield.c --- elinks/src/bfu/inpfield.c 2005-12-02 10:57:10.650774750 +0100 +++ elinks.2/src/bfu/inpfield.c 2005-12-22 17:54:00.799028000 +0100 @@ -17,6 +17,7 @@ #include "bfu/msgbox.h" #include "bfu/text.h" #include "config/kbdbind.h" +#include "intl/charsets.h" #include "intl/gettext/libintl.h" #include "osdep/osdep.h" #include "session/session.h" @@ -101,6 +102,71 @@ return EVENT_NOT_PROCESSED; } +#if 0 +void +dlg_format_field(struct terminal *term, + struct widget_data *widget_data, + int x, int *y, int w, int *rw, enum format_align align) +{ + static int max_label_width; + static int *prev_y; /* Assert the uniqueness of y */ /* TODO: get rid of this !! --Zas */ + unsigned char *label = widget_data->widget->text; + struct color_pair *text_color = NULL; + int label_width = 0; + int float_label = widget_data->widget->info.field.flags & (INPFIELD_FLOAT|INPFIELD_FLOAT2); + + if (label && *label && float_label) { + unsigned char *l2 = label; + int len = strlen(label); + + label_width = term->utf8 ? strlen_utf8(&l2) : len; + if (prev_y == y) { + int_lower_bound(&max_label_width, label_width); + } else { + max_label_width = label_width; + prev_y = y; + } + + /* Right align the floating label up against the + * input field */ + x += max_label_width - label_width; + w -= max_label_width - len; + } + + if (label && *label) { + if (term) text_color = get_bfu_color(term, "dialog.text"); + + dlg_format_text_do(term, label, x, y, w, rw, text_color, ALIGN_LEFT); + } + + /* XXX: We want the field and label on the same line if the terminal + * width allows it. */ + if (label && *label && float_label) { + if (widget_data->widget->info.field.flags & INPFIELD_FLOAT) { + (*y) -= INPUTFIELD_HEIGHT; + dlg_format_text_do(term, INPUTFIELD_FLOAT_SEPARATOR, + x + label_width, y, w, rw, + text_color, ALIGN_LEFT); + w -= INPUTFIELD_FLOAT_SEPARATOR_LEN + INPUTFIELD_FLOATLABEL_PADDING; + x += INPUTFIELD_FLOAT_SEPARATOR_LEN + INPUTFIELD_FLOATLABEL_PADDING; + } + + /* FIXME: Is 5 chars for input field enough? --jonas */ + if (label_width < w - 5) { + (*y) -= INPUTFIELD_HEIGHT; + w -= label_width; + x += label_width; + } + } + + if (rw) int_lower_bound(rw, int_min(w, DIALOG_MIN_WIDTH)); + + set_box(&widget_data->box, x, *y, w, INPUTFIELD_HEIGHT); + + (*y) += INPUTFIELD_HEIGHT; +} +#endif + void dlg_format_field(struct terminal *term, struct widget_data *widget_data, @@ -161,6 +227,7 @@ (*y) += INPUTFIELD_HEIGHT; } + static widget_handler_status_T input_field_cancel(struct dialog_data *dlg_data, struct widget_data *widget_data) { @@ -257,6 +324,7 @@ check, fn, cancelfn); } + static widget_handler_status_T display_field_do(struct dialog_data *dlg_data, struct widget_data *widget_data, int hide) @@ -264,11 +332,30 @@ struct terminal *term = dlg_data->win->term; struct color_pair *color; int sel = is_selected_widget(dlg_data, widget_data); + int len = 0, left = 0; - int_bounds(&widget_data->info.field.vpos, + if (term->utf8) { + unsigned char *t = widget_data->cdata; + unsigned char *t2 = t; + int p = widget_data->info.field.cpos; + unsigned char tmp = t[p]; + int x; + + t[p] = '\0'; + len = strlen_utf8(&t2); + int_bounds(&left, len - widget_data->box.width + 1, len); + int_lower_bound(&left, 0); + for (t2 = t, x = 0; x < left; x++) { + utf_8_to_unicode(&t2, &t[p]); + } + t[p] = tmp; + widget_data->info.field.vpos = (int)(t2 - t); + } else { + int_bounds(&widget_data->info.field.vpos, widget_data->info.field.cpos - widget_data->box.width + 1, widget_data->info.field.cpos); - int_lower_bound(&widget_data->info.field.vpos, 0); + int_lower_bound(&widget_data->info.field.vpos, 0); + } color = get_bfu_color(term, "dialog.field"); if (color) @@ -276,7 +363,8 @@ color = get_bfu_color(term, "dialog.field-text"); if (color) { - int len = strlen(widget_data->cdata + widget_data->info.field.vpos); + unsigned char *text = widget_data->cdata + widget_data->info.field.vpos; + int len = strlen(text); int w = int_min(len, widget_data->box.width); if (!hide) { @@ -286,6 +374,9 @@ } else { struct box box; + if (term->utf8) len = strlen_utf8(&text); + w = int_min(len, widget_data->box.width); + copy_box(&box, &widget_data->box); box.width = w; @@ -294,8 +385,14 @@ } if (sel) { - int x = widget_data->box.x + widget_data->info.field.cpos - widget_data->info.field.vpos; + int x; + + if (term->utf8) { + x = widget_data->box.x + len - left; + } else { + x = widget_data->box.x + widget_data->info.field.cpos - widget_data->info.field.vpos; + } set_cursor(term, x, widget_data->box.y, 0); set_window_ptr(dlg_data->win, widget_data->box.x, widget_data->box.y); } @@ -434,13 +531,33 @@ break; case ACT_EDIT_RIGHT: - if (widget_data->info.field.cpos < strlen(widget_data->cdata)) - widget_data->info.field.cpos++; + if (widget_data->info.field.cpos < strlen(widget_data->cdata)) { + if (term->utf8) { + unsigned char *next = widget_data->cdata + widget_data->info.field.cpos; + unsigned char *end = strchr(next, '\0'); + + utf_8_to_unicode(&next, end); + widget_data->info.field.cpos = (int)(next - widget_data->cdata); + } else + widget_data->info.field.cpos++; + } goto display_field; case ACT_EDIT_LEFT: if (widget_data->info.field.cpos > 0) widget_data->info.field.cpos--; + if (widget_data->info.field.cpos && term->utf8) { + unsigned char *t = widget_data->cdata; + unsigned char *t2 = t; + int p = widget_data->info.field.cpos; + unsigned char tmp = t[p]; + + t[p] = '\0'; + strlen_utf8(&t2); + t[p] = tmp; + widget_data->info.field.cpos = (int)(t2 - t); + + } goto display_field; case ACT_EDIT_HOME: @@ -452,6 +569,19 @@ goto display_field; case ACT_EDIT_BACKSPACE: + if (widget_data->info.field.cpos && term->utf8) { + unsigned char *t = widget_data->cdata; + unsigned char *t2 = t; + int p = widget_data->info.field.cpos - 1; + unsigned char tmp = t[p]; + + t[p] = '\0'; + strlen_utf8(&t2); + t[p] = tmp; + memmove(t2, &t[p + 1], strlen(&t[p + 1]) + 1); + widget_data->info.field.cpos = (int)(t2 - t); + goto display_field; + } if (widget_data->info.field.cpos) { memmove(widget_data->cdata + widget_data->info.field.cpos - 1, widget_data->cdata + widget_data->info.field.cpos, @@ -466,6 +596,15 @@ if (widget_data->info.field.cpos >= cdata_len) goto display_field; + if (term->utf8) { + unsigned char *next = widget_data->cdata + widget_data->info.field.cpos; + unsigned char *dest = next; + unsigned char *end = strchr(next, '\0'); + + utf_8_to_unicode(&next, end); + memmove(dest, next, strlen(next) + 1); + goto display_field; + } memmove(widget_data->cdata + widget_data->info.field.cpos, widget_data->cdata + widget_data->info.field.cpos + 1, cdata_len - widget_data->info.field.cpos + 1); @@ -536,6 +675,7 @@ default: if (check_kbd_textinput_key(ev)) { unsigned char *text = widget_data->cdata; + int textlen = strlen(text); if (textlen >= widget_data->widget->datalen - 1) @@ -547,7 +687,20 @@ memmove(text + 1, text, textlen + 1); *text = get_kbd_key(ev); - + if (term->utf8) { + static unsigned char buf[7]; + unsigned char *t = buf; + static int i = 0; + unicode_val_T data; + + buf[i++] = *text; + buf[i] = '\0'; + data = utf_8_to_unicode(&t, buf + i); + if (i == 6) i = 0; + if (data == UCS_NO_CHAR) + return EVENT_PROCESSED; + else i = 0; + } goto display_field; } } diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/bfu/menu.c elinks.2/src/bfu/menu.c --- elinks/src/bfu/menu.c 2005-12-22 17:44:54.552889750 +0100 +++ elinks.2/src/bfu/menu.c 2005-12-22 17:54:00.827029750 +0100 @@ -346,10 +346,12 @@ struct color_pair *hk_color_sel = get_bfu_color(term, "menu.hotkey.selected"); enum screen_char_attr hk_attr = get_opt_bool("ui.dialogs.underline_hotkeys") ? SCREEN_ATTR_UNDERLINE : 0; + unsigned char *text2, *end; unsigned char c; int xbase = x + L_TEXT_SPACE; int w = width - (L_TEXT_SPACE + R_TEXT_SPACE); int hk_state = 0; + #ifdef CONFIG_DEBUG /* For redundant hotkeys highlighting. */ int double_hk = 0; @@ -366,6 +368,7 @@ hk_color_sel = tmp; } + if (term->utf8) goto utf8; for (x = 0; x - !!hk_state < w && (c = text[x]); x++) { if (!hk_state && x == hotkey_pos - 1) { hk_state = 1; @@ -384,6 +387,32 @@ draw_char(term, xbase + x - !!hk_state, y, c, 0, color); } } + return; +utf8: + end = strchr(text, '\0'); + text2 = text; + for (x = 0; x - !!hk_state < w && *text2; x++) { + unicode_val_T data; + + data = utf_8_to_unicode(&text2, end); + if (!hk_state && (int)(text2 - text) == hotkey_pos) { + hk_state = 1; + continue; + } + if (hk_state == 1) { +#ifdef CONFIG_DEBUG + draw_char(term, xbase + x - 1, y, data, hk_attr, + (double_hk ? hk_color_sel : hk_color)); +#else + draw_char(term, xbase + x - 1, y, data, hk_attr, hk_color); +#endif + hk_state = 2; + } else { + draw_char(term, xbase + x - !!hk_state, y, data, 0, color); + } + + } + } static inline void diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/dialogs/options.c elinks.2/src/dialogs/options.c --- elinks/src/dialogs/options.c 2005-12-02 10:57:10.674776250 +0100 +++ elinks.2/src/dialogs/options.c 2005-12-22 17:54:00.835030250 +0100 @@ -59,7 +59,6 @@ unsigned char *name = get_cp_name(i); if (!name) break; - if (is_cp_special(i)) continue; add_to_menu(&mi, name, NULL, ACT_MAIN_NONE, display_codepage, get_cp_mime_name(i), 0); diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/document/dom/renderer.c elinks.2/src/document/dom/renderer.c --- elinks/src/document/dom/renderer.c 2005-12-22 17:44:54.556890000 +0100 +++ elinks.2/src/document/dom/renderer.c 2005-12-22 17:54:00.851031250 +0100 @@ -192,8 +192,11 @@ struct document *document = renderer->document; struct conv_table *convert = renderer->convert_table; enum convert_string_mode mode = renderer->convert_mode; + int utf8 = document->options.utf8; + unsigned char *end, *text; int x; + assert(renderer && template && string && length); string = convert_string(convert, string, length, document->options.cp, @@ -207,6 +210,7 @@ add_search_node(renderer, length); + if (utf8) goto utf_8; for (x = 0; x < length; x++, renderer->canvas_x++) { unsigned char data = string[x]; @@ -235,7 +239,42 @@ copy_screen_chars(POS(renderer), template, 1); } + goto end; +utf_8: + end = string + length; + for (text = string; text < end; renderer->canvas_x++) { + unsigned char data = *text; + unicode_val_T d2; + + /* This is mostly to be able to break out so the indentation + * level won't get to high. */ + switch (data) { + case ASCII_TAB: + { + int tab_width = 7 - (X(renderer) & 7); + int width = WIDTH(renderer, end - text + tab_width); + + template->data = ' '; + + if (!realloc_line(document, width, Y(renderer))) + break; + + /* Only loop over the expanded tab chars and let the + * ``main loop'' add the actual tab char. */ + for (; tab_width-- > 0; renderer->canvas_x++) + copy_screen_chars(POS(renderer), template, 1); + text++; + break; + } + default: + d2 = utf_8_to_unicode(&text, end); + if (d2 == UCS_NO_CHAR) text++; + template->data = (uint16_t)d2; + } + copy_screen_chars(POS(renderer), template, 1); + } +end: mem_free(string); } @@ -610,6 +649,8 @@ init_dom_renderer(&renderer, document, buffer, convert_table); document->bgcolor = document->options.default_bg; + document->options.utf8 = + (codepages[document->options.cp & ~SYSTEM_CHARSET_FLAG].table == table_utf_8); if (cached->content_type && !strlcasecmp("application/rss+xml", 19, cached->content_type, -1)) diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/document/html/renderer.c elinks.2/src/document/html/renderer.c --- elinks/src/document/html/renderer.c 2005-12-02 10:57:10.762781750 +0100 +++ elinks.2/src/document/html/renderer.c 2005-12-22 17:54:00.859031750 +0100 @@ -23,6 +23,7 @@ #include "document/refresh.h" #include "document/renderer.h" #include "intl/charsets.h" +#include "osdep/types.h" #include "protocol/uri.h" #include "session/session.h" #include "terminal/color.h" @@ -156,7 +157,6 @@ if (!realloc_lines(document, y)) return -1; - line = &document->data[y]; if (length < line->length) @@ -404,7 +404,7 @@ /* First possibly do the format change and then find out what coordinates * to use since sub- or superscript might change them */ -static inline void +static inline int set_hline(struct html_context *html_context, unsigned char *chars, int charslen, enum link_state link_state) { @@ -413,34 +413,83 @@ link_state); int x = part->cx; int y = part->cy; + int x2 = x; + int len = charslen; + int utf8 = html_context->options->utf8; assert(part); - if_assert_failed return; + if_assert_failed return len; + + assert(charslen >= 0); if (realloc_spaces(part, x + charslen)) - return; + return len; if (part->document) { if (realloc_line(html_context, part->document, Y(y), X(x) + charslen - 1)) - return; - - for (; charslen > 0; charslen--, x++, chars++) { - if (*chars == NBSP_CHAR) { - schar->data = ' '; - part->spaces[x] = html_context->options->wrap_nbsp; - } else { - part->spaces[x] = (*chars == ' '); - schar->data = *chars; + return len; + if (utf8) { + unsigned char *end; + + for (end = chars + charslen; chars < end; x++) { + if (*chars == NBSP_CHAR) { + schar->data = ' '; + part->spaces[x] = html_context->options->wrap_nbsp; + chars++; + } else { + unicode_val_T data; + + part->spaces[x] = (*chars == ' '); + data = utf_8_to_unicode(&chars, end); + if (data == UCS_NO_CHAR) { + /* HR */ + unsigned char attr = schar->attr; + + schar->data = *chars++; + schar->attr = SCREEN_ATTR_FRAME; + copy_screen_chars(&POS(x, y), schar, 1); + schar->attr = attr; + continue; + } else { + schar->data = (uint16_t)data; + } + } + copy_screen_chars(&POS(x, y), schar, 1); + } + } else { + for (; charslen > 0; charslen--, x++, chars++) { + if (*chars == NBSP_CHAR) { + schar->data = ' '; + part->spaces[x] = html_context->options->wrap_nbsp; + } else { + part->spaces[x] = (*chars == ' '); + schar->data = *chars; + } + copy_screen_chars(&POS(x, y), schar, 1); } - copy_screen_chars(&POS(x, y), schar, 1); } + len = x - x2; } else { - for (; charslen > 0; charslen--, x++, chars++) { - part->spaces[x] = (*chars == ' '); + if (utf8) { + unsigned char *end; + + for (end = chars + charslen; chars < end; x++) { + unicode_val_T data; + + part->spaces[x] = (*chars == ' '); + data = utf_8_to_unicode(&chars, end); + if (data == UCS_NO_CHAR) chars++; + } + len = x - x2; + } else { + for (; charslen > 0; charslen--, x++, chars++) { + part->spaces[x] = (*chars == ' '); + } } } + return len; } static void @@ -1163,7 +1212,7 @@ static inline void process_link(struct html_context *html_context, enum link_state link_state, - unsigned char *chars, int charslen) + unsigned char *chars, int charslen, int utf8_len) { struct part *part = html_context->part; struct link *link; @@ -1215,6 +1264,7 @@ if (x_offset) { charslen -= x_offset; chars += x_offset; + utf8_len -= x_offset; } link = new_link(html_context, chars, charslen); @@ -1229,14 +1279,14 @@ } /* Add new canvas positions to the link. */ - if (realloc_points(link, link->npoints + charslen)) { + if (realloc_points(link, link->npoints + utf8_len)) { struct point *point = &link->points[link->npoints]; int x = X(part->cx) + x_offset; int y = Y(part->cy); - link->npoints += charslen; + link->npoints += utf8_len; - for (; charslen > 0; charslen--, point++, x++) { + for (; utf8_len > 0; utf8_len--, point++, x++) { point->x = x; point->y = y; } @@ -1288,6 +1338,7 @@ enum link_state link_state; int update_after_subscript = renderer_context.subscript; struct part *part; + int utf8_len; assert(html_context); if_assert_failed return; @@ -1346,9 +1397,7 @@ else if (html_context->options->links_numbering) put_link_number(html_context); } - - set_hline(html_context, chars, charslen, link_state); - + utf8_len = set_hline(html_context, chars, charslen, link_state); if (link_state != LINK_STATE_NONE) { #define is_drawing_subs_or_sups() \ @@ -1368,15 +1417,15 @@ } #undef is_drawing_subs_or_sups - - process_link(html_context, link_state, chars, charslen); + process_link(html_context, link_state, chars, charslen, + utf8_len); } if (renderer_context.nowrap - && part->cx + charslen > overlap(par_format)) + && part->cx + utf8_len > overlap(par_format)) return; - part->cx += charslen; + part->cx += utf8_len; renderer_context.nobreak = 0; if (!html_is_preformatted()) { @@ -1392,7 +1441,7 @@ } assert(charslen > 0); - part->xa += charslen; + part->xa += utf8_len; int_lower_bound(&part->max_width, part->xa + par_format.leftmargin + par_format.rightmargin - (chars[charslen - 1] == ' ' @@ -1956,6 +2005,8 @@ &document->cp, &document->cp_status, document->options.hard_assume); + html_context->options->utf8 = + codepages[document->options.cp & ~SYSTEM_CHARSET_FLAG].table == table_utf_8; if (title.length) { document->title = convert_string(renderer_context.convert_table, diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/document/options.h elinks.2/src/document/options.h --- elinks/src/document/options.h 2005-12-02 10:57:10.762781750 +0100 +++ elinks.2/src/document/options.h 2005-12-22 17:54:00.867032250 +0100 @@ -71,6 +71,7 @@ unsigned int plain:1; unsigned int wrap:1; + unsigned int utf8:1; /* XXX: Everything past this comment is specialy handled by compare_opt() */ unsigned char *framename; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/document/plain/renderer.c elinks.2/src/document/plain/renderer.c --- elinks/src/document/plain/renderer.c 2005-12-02 10:57:10.766782000 +0100 +++ elinks.2/src/document/plain/renderer.c 2005-12-22 17:54:00.875032750 +0100 @@ -65,6 +65,7 @@ if (!line) return NULL; if (x != line->length) { + if (!ALIGN_LINE(&line->chars, line->length, x)) return NULL; @@ -230,8 +231,10 @@ struct screen_char *template = &renderer->template; struct screen_char saved_renderer_template = *template; struct screen_char *pos, *startpos; + unsigned char *end, *text; int lineno = renderer->lineno; int expanded = 0; + int utf8 = document->options.utf8; int width = line_width; int line_pos; @@ -272,6 +275,7 @@ assert(expanded >= 0); + if (utf8) goto utf_8; startpos = pos = realloc_line(document, width + expanded, lineno); if (!pos) { mem_free(line); @@ -399,7 +403,139 @@ *template = saved_renderer_template; } } + goto end; +utf_8: + end = line + width; + startpos = pos = realloc_line(document, width + expanded, lineno); + if (!pos) { + mem_free(line); + return 0; + } + + expanded = 0; + for (text = line; text < end; ) { + unsigned char line_char = *text; + unsigned char next_char, prev_char; + + line_pos = text - line; + prev_char = text > line ? *(text - 1) : '\0'; + next_char = (text + 1 < end) ? *(text + 1) : '\0'; + + /* Do not expand tabs that precede back-spaces; this saves the + * back-space code some trouble. */ + if (line_char == ASCII_TAB && next_char != ASCII_BS) { + int tab_width = 7 - ((line_pos + expanded) & 7); + + expanded += tab_width; + + template->data = ' '; + do + copy_screen_chars(pos++, template, 1); + while (tab_width--); + + *template = saved_renderer_template; + text++; + } else if (line_char == ASCII_BS) { + if (!(expanded + line_pos)) { + /* We've backspaced to the start of the line */ + if (expanded > 0) + expanded--; /* Don't count it */ + continue; + } + + if (pos > startpos) + pos--; /* Backspace */ + + /* Handle x^H_ as _^Hx, but prevent an infinite loop + * swapping two underscores. */ + if (next_char == '_' && prev_char != '_') { + /* x^H_ becomes _^Hx */ + if (text - 1 >= line) + *(text - 1) = next_char; + if (text + 1 < end) + *(text + 1) = prev_char; + + /* Go back and reparse the swapped characters */ + if (text - 2 >= line) + text -= 2; + continue; + } + + if (expanded - 2 >= 0) { + /* Don't count the backspace character or the + * deleted character when returning the line's + * width or when expanding tabs. */ + expanded -= 2; + } + + if (pos->data == '_' && next_char == '_') { + /* Is _^H_ an underlined underscore + * or an emboldened underscore? */ + + if (expanded + line_pos >= 0 + && pos - 1 >= startpos + && (pos - 1)->attr) { + /* There is some preceding text, + * and it has an attribute; copy it */ + template->attr |= (pos - 1)->attr; + } else { + /* Default to bold; seems more useful + * than underlining the underscore */ + template->attr |= SCREEN_ATTR_BOLD; + } + + } else if (pos->data == '_') { + /* Underline _^Hx */ + + template->attr |= SCREEN_ATTR_UNDERLINE; + + } else if (pos->data == next_char) { + /* Embolden x^Hx */ + + template->attr |= SCREEN_ATTR_BOLD; + } + + /* Handle _^Hx^Hx as both bold and underlined */ + if (template->attr) + template->attr |= pos->attr; + text++; + } else { + int added_chars = 0; + + if (document->options.plain_display_links + && isalpha(line_char) && isalpha(next_char)) { + /* We only want to check for a URI if there are + * at least two consecutive alphabetic + * characters, or if we are at the very start of + * the line. It improves performance a bit. + * --Zas */ + added_chars = print_document_link(renderer, + lineno, line, + line_pos, + width, + expanded, + pos); + } + if (added_chars) { + text += added_chars; + pos += added_chars; + } else { + unicode_val_T data = utf_8_to_unicode(&text, end); + + if (data == UCS_NO_CHAR) text++; + template->data = (uint16_t)data; + copy_screen_chars(pos++, template, 1); + + /* Detect copy of nul chars to screen, this + * should not occur. --Zas */ + assert(line_char); + } + + *template = saved_renderer_template; + } + } +end: mem_free(line); realloc_line(document, pos - startpos, lineno); @@ -552,6 +688,8 @@ document->bgcolor = document->options.default_bg; document->width = 0; + document->options.utf8 = + (codepages[document->options.cp & ~SYSTEM_CHARSET_FLAG].table == table_utf_8); /* Setup the style */ init_template(&renderer.template, &document->options); diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/intl/charsets.c elinks.2/src/intl/charsets.c --- elinks/src/intl/charsets.c 2005-12-02 10:57:10.818785250 +0100 +++ elinks.2/src/intl/charsets.c 2005-12-22 17:54:00.883033250 +0100 @@ -23,18 +23,9 @@ /* Fix namespace clash on MacOS. */ +#if 0 #define table table_elinks - -struct table_entry { - unsigned char c; - unicode_val_T u; -}; - -struct codepage_desc { - unsigned char *name; - unsigned char **aliases; - struct table_entry *table; -}; +#endif #include "intl/codepage.inc" #include "intl/uni_7b.inc" @@ -108,19 +99,19 @@ } } -#define BIN_SEARCH(table, entry, entries, key, result) \ +#define BIN_SEARCH(table_elinks, entry, entries, key, result) \ { \ long _s = 0, _e = (entries) - 1; \ \ while (_s <= _e || !((result) = -1)) { \ long _m = (_s + _e) / 2; \ \ - if ((table)[_m].entry == (key)) { \ + if ((table_elinks)[_m].entry == (key)) { \ (result) = _m; \ break; \ } \ - if ((table)[_m].entry > (key)) _e = _m - 1; \ - if ((table)[_m].entry < (key)) _s = _m + 1; \ + if ((table_elinks)[_m].entry > (key)) _e = _m - 1; \ + if ((table_elinks)[_m].entry < (key)) _s = _m + 1; \ } \ } \ @@ -131,8 +122,6 @@ 0x007e, 0x2122, 0x0161, 0x003e, 0x0153, 0x0000, 0x0000, 0x0000, }; -#define SYSTEM_CHARSET_FLAG 128 - unsigned char * u2cp_(unicode_val_T u, int to, int no_nbsp_hack) { @@ -140,6 +129,12 @@ int s; if (u < 128) return strings[u]; + + to &= ~SYSTEM_CHARSET_FLAG; + + if (codepages[to].table == table_utf_8) + return encode_utf_8(u); + /* To mark non breaking spaces, we use a special char NBSP_CHAR. */ if (u == 0xa0) return no_nbsp_hack ? " " : NBSP_CHAR_STRING; if (u == 0xad) return ""; @@ -151,7 +146,6 @@ return u2cp_(strange, to, no_nbsp_hack); } - to &= ~SYSTEM_CHARSET_FLAG; for (j = 0; codepages[to].table[j].c; j++) if (codepages[to].table[j].u == u) @@ -165,7 +159,7 @@ static unsigned char utf_buffer[7]; -static unsigned char * +inline unsigned char * encode_utf_8(unicode_val_T u) { memset(utf_buffer, 0, 7); @@ -200,6 +194,91 @@ return utf_buffer; } +inline int +strlen_utf8(unsigned char **str) +{ + unsigned char *s = *str; + unsigned char *end = strchr(s, '\0'); + int x; + int len; + + for (x = 0;; x++, s += len) { + if (*s < 0x80) len = 1; + else if (*s < 0xe0) len = 2; + else if (*s < 0xf0) len = 3; + else if (*s < 0xf8) len = 4; + else if (*s < 0xfc) len = 5; + else len = 6; + if (s + len > end) break; + } + *str = s; + return x; +} + +inline unicode_val_T +utf_8_to_unicode(unsigned char **string, unsigned char *end) +{ + unsigned char *str = *string; + unicode_val_T u; + int length; + + if (str[0] < 0x80) + length = 1; + else if (str[0] < 0xe0) + length = 2; + else if (str[0] < 0xf0) + length = 3; + else if (str[0] < 0xf8) + length = 4; + else if (str[0] < 0xfc) + length = 5; + else + length = 6; + + if (str + length > end) { + return UCS_NO_CHAR; + } + + switch (length) { + case 1: + u = str[0]; + break; + case 2: + u = (str[0] & 0x1f) << 6; + u += (str[1] & 0x3f); + break; + case 3: + u = (str[0] & 0x0f) << 12; + u += ((str[1] & 0x3f) << 6); + u += (str[2] & 0x3f); + break; + case 4: + u = (str[0] & 0x0f) << 18; + u += ((str[1] & 0x3f) << 12); + u += ((str[2] & 0x3f) << 6); + u += (str[3] & 0x3f); + break; + case 5: + u = (str[0] & 0x0f) << 24; + u += ((str[1] & 0x3f) << 18); + u += ((str[2] & 0x3f) << 12); + u += ((str[3] & 0x3f) << 6); + u += (str[4] & 0x3f); + break; + case 6: + default: + u = (str[0] & 0x01) << 30; + u += ((str[1] & 0x3f) << 24); + u += ((str[2] & 0x3f) << 18); + u += ((str[3] & 0x3f) << 12); + u += ((str[4] & 0x3f) << 6); + u += (str[5] & 0x3f); + break; + } + *string = str + length; + return u > 0xffff ? '*' : u; +} + /* This slow and ugly code is used by the terminal utf_8_io */ unsigned char * cp2utf_8(int from, int c) @@ -302,7 +381,7 @@ return utf_table; } -struct conv_table table[256]; +struct conv_table table_elinks[256]; static int first = 1; void @@ -310,10 +389,10 @@ { if (!utf_table_init) free_utf_table(); if (first) { - memset(table, 0, sizeof(table)); + memset(table_elinks, 0, sizeof(table_elinks)); first = 0; } - new_translation_table(table); + new_translation_table(table_elinks); } @@ -326,7 +405,7 @@ from &= ~SYSTEM_CHARSET_FLAG; to &= ~SYSTEM_CHARSET_FLAG; if (first) { - memset(table, 0, sizeof(table)); + memset(table_elinks, 0, sizeof(table_elinks)); first = 0; } if (/*from == to ||*/ from == -1 || to == -1) @@ -334,21 +413,21 @@ if (codepages[to].table == table_utf_8) return get_translation_table_to_utf_8(from); if (from == lfr && to == lto) - return table; + return table_elinks; lfr = from; lto = to; - new_translation_table(table); + new_translation_table(table_elinks); if (codepages[from].table == table_utf_8) { int i; for (i = 0; codepages[to].table[i].c; i++) - add_utf_8(table, codepages[to].table[i].u, + add_utf_8(table_elinks, codepages[to].table[i].u, strings[codepages[to].table[i].c]); for (i = 0; unicode_7b[i].x != -1; i++) if (unicode_7b[i].x >= 0x80) - add_utf_8(table, unicode_7b[i].x, + add_utf_8(table_elinks, unicode_7b[i].x, unicode_7b[i].s); } else { @@ -362,14 +441,14 @@ unsigned char *u; u = u2cp(codepages[from].table[j].u, to); - if (u) table[i].u.str = u; + if (u) table_elinks[i].u.str = u; break; } } } } - return table; + return table_elinks; } static inline int @@ -430,11 +509,16 @@ static struct entity_cache entity_cache[ENTITY_CACHE_MAXLEN][ENTITY_CACHE_SIZE]; static unsigned int nb_entity_cache[ENTITY_CACHE_MAXLEN]; static int first_time = 1; - unsigned int slen; + unsigned int slen = 0; unsigned char *result = NULL; if (strlen <= 0) return NULL; + /* TODO: caching UTF-8 */ + encoding &= ~SYSTEM_CHARSET_FLAG; + if (codepages[encoding].table == table_utf_8) + goto skip; + if (first_time) { memset(&nb_entity_cache, 0, ENTITY_CACHE_MAXLEN * sizeof(unsigned int)); first_time = 0; @@ -488,7 +572,7 @@ fprintf(stderr, "miss\n"); #endif } - +skip: if (*str == '#') { /* Numeric entity. */ int l = (int) strlen; unsigned char *st = (unsigned char *) str; @@ -540,6 +624,9 @@ if (element) result = u2cp(element->c, encoding); } + if (codepages[encoding].table == table_utf_8) { + return result; + } end: /* Take care of potential buffer overflow. */ if (strlen < sizeof(entity_cache[slen][0].str)) { diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/intl/charsets.h elinks.2/src/intl/charsets.h --- elinks/src/intl/charsets.h 2005-12-02 10:57:10.818785250 +0100 +++ elinks.2/src/intl/charsets.h 2005-12-22 17:54:00.891033750 +0100 @@ -18,6 +18,20 @@ } u; }; +struct table_entry { + unsigned char c; + unicode_val_T u; +}; + +struct codepage_desc { + unsigned char *name; + unsigned char **aliases; + struct table_entry *table; +}; + +extern struct codepage_desc codepages[]; +extern struct table_entry table_utf_8[]; + enum convert_string_mode { CSM_DEFAULT, /* Convert any char. */ CSM_QUERY, /* Special handling of '&' and '=' chars. */ @@ -36,6 +50,8 @@ #undef convert_string #define convert_string convert_string_elinks +#define SYSTEM_CHARSET_FLAG 128 + /* This routine converts a string from one charset to another according to the * passed @convert_table, potentially also decoding SGML (HTML) entities along * the way (according to @mode). It either returns dynamically allocated @@ -53,6 +69,10 @@ unsigned char *get_cp_mime_name(int); int is_cp_special(int); void free_conv_table(void); +inline unsigned char *encode_utf_8(unicode_val_T); +inline int strlen_utf8(unsigned char **); +inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *); + unsigned char *cp2utf_8(int, int); unsigned char *u2cp_(unicode_val_T, int, int no_nbsp_hack); diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/draw.c elinks.2/src/terminal/draw.c --- elinks/src/terminal/draw.c 2005-12-02 10:57:10.994796250 +0100 +++ elinks.2/src/terminal/draw.c 2005-12-22 17:54:00.899034250 +0100 @@ -102,7 +102,7 @@ } void -draw_char_data(struct terminal *term, int x, int y, unsigned char data) +draw_char_data(struct terminal *term, int x, int y, uint16_t data) { struct screen_char *screen_char = get_char(term, x, y); @@ -200,7 +200,7 @@ void draw_char(struct terminal *term, int x, int y, - unsigned char data, enum screen_char_attr attr, + uint16_t data, enum screen_char_attr attr, struct color_pair *color) { struct screen_char *screen_char = get_char(term, x, y); @@ -277,6 +277,41 @@ draw_box(term, &dbox, ' ', 0, color); } +static void +draw_text_utf8(struct terminal *term, int x, int y, + unsigned char *text, int length, + enum screen_char_attr attr, struct color_pair *color) +{ + struct screen_char *start, *pos; + unsigned char *end = text + length; + unicode_val_T data; + + assert(text && length >= 0); + if_assert_failed return; + + if (length <= 0) return; + if (x >= term->width) return; + + data = utf_8_to_unicode(&text, end); + if (data == UCS_NO_CHAR) return; + start = get_char(term, x++, y); + start->data = (uint16_t)data; + if (color) { + start->attr = attr; + set_term_color(start, color, 0, + get_opt_int_tree(term->spec, "colors")); + } + + for (pos = start + 1; x < term->width; x++, pos++) { + data = utf_8_to_unicode(&text, end); + if (data == UCS_NO_CHAR) break; + if (color) copy_screen_chars(pos, start, 1); + pos->data = (uint16_t)data; + } + set_screen_dirty(term->screen, y, y); + +} + void draw_text(struct terminal *term, int x, int y, unsigned char *text, int length, @@ -288,6 +323,11 @@ assert(text && length >= 0); if_assert_failed return; + if (term->utf8) { + draw_text_utf8(term, x, y, text, length, attr, color); + return; + } + if (length <= 0) return; pos = get_char(term, x, y); if (!pos) return; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/draw.h elinks.2/src/terminal/draw.h --- elinks/src/terminal/draw.h 2005-12-02 10:57:10.994796250 +0100 +++ elinks.2/src/terminal/draw.h 2005-12-22 17:54:00.907034750 +0100 @@ -1,6 +1,8 @@ #ifndef EL__TERMINAL_DRAW_H #define EL__TERMINAL_DRAW_H +#include "intl/charsets.h" + struct color_pair; struct box; struct terminal; @@ -19,7 +21,7 @@ /* One position in the terminal screen's image. */ struct screen_char { /* Contains either character value or frame data. */ - unsigned char data; + uint16_t data; /* Attributes are screen_char_attr bits. */ unsigned char attr; @@ -202,7 +204,7 @@ struct color_pair *color); /* Sets the data of a screen position. */ -void draw_char_data(struct terminal *term, int x, int y, unsigned char data); +void draw_char_data(struct terminal *term, int x, int y, uint16_t data); /* Sets the data to @border and of a screen position. */ void draw_border_char(struct terminal *term, int x, int y, @@ -214,7 +216,7 @@ /* Draws a char. */ void draw_char(struct terminal *term, int x, int y, - unsigned char data, enum screen_char_attr attr, + uint16_t data, enum screen_char_attr attr, struct color_pair *color); /* Draws area defined by @box using the same colors and attributes. */ diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/event.c elinks.2/src/terminal/event.c --- elinks/src/terminal/event.c 2005-12-22 17:44:54.572891000 +0100 +++ elinks.2/src/terminal/event.c 2005-12-22 17:58:15.430941500 +0100 @@ -131,7 +131,11 @@ { unsigned char *recoded; - recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset")); + if (term->utf8) + recoded = encode_utf_8(u); + else + recoded = u2cp_no_nbsp(u, get_opt_codepage_tree(term->spec, "charset")); + if (!recoded) recoded = "*"; while (*recoded) { ev->info.keyboard.key = *recoded; @@ -245,7 +249,6 @@ case EVENT_KBD: { - int utf8_io = -1; int key = get_kbd_key(ev); reset_timer(); @@ -260,9 +263,7 @@ } if (interlink->utf_8.len) { - utf8_io = get_opt_bool_tree(term->spec, "utf_8_io"); - - if ((key & 0xC0) == 0x80 && utf8_io) { + if ((key & 0xC0) == 0x80 && term->utf8) { interlink->utf_8.ucs <<= 6; interlink->utf_8.ucs |= key & 0x3F; if (! --interlink->utf_8.len) { @@ -280,15 +281,14 @@ } } - if (key < 0x80 || key > 0xFF - || (utf8_io == -1 - ? !get_opt_bool_tree(term->spec, "utf_8_io") - : !utf8_io)) { + if (key < 0x80 || key > 0xFF || !term->utf8) { term_send_event(term, ev); break; - } else if ((key & 0xC0) == 0xC0 && (key & 0xFE) != 0xFE) { + } + + else if ((key & 0xC0) == 0xC0 && (key & 0xFE) != 0xFE) { unsigned int mask, cov = 0x80; int len = 0; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/event.c.rej elinks.2/src/terminal/event.c.rej --- elinks/src/terminal/event.c.rej 1970-01-01 01:00:00.000000000 +0100 +++ elinks.2/src/terminal/event.c.rej 2005-12-22 17:54:00.923035750 +0100 @@ -0,0 +1,20 @@ +*************** +*** 131,137 **** + { + unsigned char *recoded; + +- recoded = u2cp_no_nbsp(u, get_opt_int_tree(term->spec, "charset")); + if (!recoded) recoded = "*"; + while (*recoded) { + ev->info.keyboard.key = *recoded; +--- 131,140 ---- + { + unsigned char *recoded; + ++ if (term->utf8) ++ recoded = encode_utf_8(u); ++ else ++ recoded = u2cp_no_nbsp(u, get_opt_int_tree(term->spec, "charset")); + if (!recoded) recoded = "*"; + while (*recoded) { + ev->info.keyboard.key = *recoded; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/screen.c elinks.2/src/terminal/screen.c --- elinks/src/terminal/screen.c 2005-12-02 10:57:10.998796500 +0100 +++ elinks.2/src/terminal/screen.c 2005-12-22 17:54:00.963038250 +0100 @@ -30,6 +30,7 @@ unsigned char frame_dumb[48] = " ||||++||++++++--|-+||++--|-+----++++++++ "; static unsigned char frame_vt100[48] = "aaaxuuukkuxkjjjkmvwtqnttmlvwtqnvvwwmmllnnjla "; +#if 0 /* For UTF8 I/O */ static unsigned char frame_vt100_u[48] = { 177, 177, 177, 179, 180, 180, 180, 191, @@ -39,6 +40,7 @@ 193, 194, 194, 192, 192, 218, 218, 197, 197, 217, 218, 177, 32, 32, 32, 32 }; +#endif static unsigned char frame_freebsd[48] = { 130, 138, 128, 153, 150, 150, 150, 140, @@ -78,6 +80,11 @@ /* begin border: */ TERM_STRING("\033[11m"), }; +static struct string utf8_linux_frame_seqs[] = { + /* end border: */ TERM_STRING("\033[10m\033%G"), + /* begin border: */ TERM_STRING("\033%@\033[11m"), +}; + static struct string vt100_frame_seqs[] = { /* end border: */ TERM_STRING("\x0f"), /* begin border: */ TERM_STRING("\x0e"), @@ -99,11 +106,6 @@ * uniquely identify the screen_driver. */ enum term_mode_type type; - /* Charsets when doing UTF8 I/O. */ - /* [0] is the common charset and [1] is the frame charset. - * Test wether to use UTF8 I/O using the use_utf8_io() macro. */ - int charsets[2]; - /* The frame translation table. May be NULL. */ unsigned char *frame; @@ -119,6 +121,9 @@ /* These are directly derived from the terminal options. */ unsigned int transparent:1; + /* UTF-8 I/O */ + unsigned int utf8:1; + /* The terminal._template_ name. */ unsigned char name[1]; /* XXX: Keep last! */ }; @@ -126,56 +131,56 @@ static struct screen_driver dumb_screen_driver = { NULL_LIST_HEAD, /* type: */ TERM_DUMB, - /* charsets: */ { -1, -1 }, /* No UTF8 I/O */ /* frame: */ frame_dumb, /* frame_seqs: */ NULL, /* underline: */ underline_seqs, /* color_mode: */ COLOR_MODE_16, /* transparent: */ 1, + /* utf-8: */ 0, }; static struct screen_driver vt100_screen_driver = { NULL_LIST_HEAD, /* type: */ TERM_VT100, - /* charsets: */ { -1, -1 }, /* No UTF8 I/O */ /* frame: */ frame_vt100, /* frame_seqs: */ vt100_frame_seqs, /* underline: */ underline_seqs, /* color_mode: */ COLOR_MODE_16, /* transparent: */ 1, + /* utf-8: */ 0, }; static struct screen_driver linux_screen_driver = { NULL_LIST_HEAD, /* type: */ TERM_LINUX, - /* charsets: */ { -1, -1 }, /* No UTF8 I/O */ /* frame: */ NULL, /* No restrict_852 */ /* frame_seqs: */ NULL, /* No m11_hack */ /* underline: */ underline_seqs, /* color_mode: */ COLOR_MODE_16, /* transparent: */ 1, + /* utf-8: */ 0, }; static struct screen_driver koi8_screen_driver = { NULL_LIST_HEAD, /* type: */ TERM_KOI8, - /* charsets: */ { -1, -1 }, /* No UTF8 I/O */ /* frame: */ frame_koi, /* frame_seqs: */ NULL, /* underline: */ underline_seqs, /* color_mode: */ COLOR_MODE_16, /* transparent: */ 1, + /* utf-8: */ 0, }; static struct screen_driver freebsd_screen_driver = { NULL_LIST_HEAD, /* type: */ TERM_FREEBSD, - /* charsets: */ { -1, -1 }, /* No UTF8 I/O */ /* frame: */ frame_freebsd, /* frame_seqs: */ NULL, /* No m11_hack */ /* underline: */ underline_seqs, /* color_mode: */ COLOR_MODE_16, /* transparent: */ 1, + /* utf-8: */ 0, }; /* XXX: Keep in sync with enum term_mode_type. */ @@ -187,13 +192,14 @@ /* TERM_FREEBSD: */ &freebsd_screen_driver, }; +#define use_utf8_io(driver) ((driver)->utf8) static INIT_LIST_HEAD(active_screen_drivers); static void update_screen_driver(struct screen_driver *driver, struct option *term_spec) { - int utf8_io = get_opt_bool_tree(term_spec, "utf_8_io"); + driver->utf8 = get_opt_bool_tree(term_spec, "utf_8_io"); driver->color_mode = get_opt_int_tree(term_spec, "colors"); driver->transparent = get_opt_bool_tree(term_spec, "transparency"); @@ -204,44 +210,22 @@ driver->underline = NULL; } - if (utf8_io) { - driver->charsets[0] = get_opt_int_tree(term_spec, "charset"); - if (driver->type == TERM_LINUX) { - if (get_opt_bool_tree(term_spec, "restrict_852")) - driver->frame = frame_restrict; - - driver->charsets[1] = get_cp_index("cp437"); - - } else if (driver->type == TERM_FREEBSD) { - driver->charsets[1] = get_cp_index("cp437"); - - } else if (driver->type == TERM_VT100) { - driver->frame = frame_vt100_u; - driver->charsets[1] = get_cp_index("cp437"); - - } else if (driver->type == TERM_KOI8) { - driver->charsets[1] = get_cp_index("koi8-r"); + if (driver->type == TERM_LINUX) { + if (get_opt_bool_tree(term_spec, "restrict_852")) + driver->frame = frame_restrict; + + if (get_opt_bool_tree(term_spec, "m11_hack")) + driver->frame_seqs = m11_hack_frame_seqs; + + if (driver->utf8) + driver->frame_seqs = utf8_linux_frame_seqs; + + } else if (driver->type == TERM_FREEBSD) { + if (get_opt_bool_tree(term_spec, "m11_hack")) + driver->frame_seqs = m11_hack_frame_seqs; - } else { - driver->charsets[1] = driver->charsets[0]; - } - - } else { - driver->charsets[0] = -1; - if (driver->type == TERM_LINUX) { - if (get_opt_bool_tree(term_spec, "restrict_852")) - driver->frame = frame_restrict; - - if (get_opt_bool_tree(term_spec, "m11_hack")) - driver->frame_seqs = m11_hack_frame_seqs; - - } else if (driver->type == TERM_FREEBSD) { - if (get_opt_bool_tree(term_spec, "m11_hack")) - driver->frame_seqs = m11_hack_frame_seqs; - - } else if (driver->type == TERM_VT100) { - driver->frame = frame_vt100; - } + } else if (driver->type == TERM_VT100) { + driver->frame = frame_vt100; } } @@ -263,6 +247,7 @@ return 0; } + static inline struct screen_driver * add_screen_driver(enum term_mode_type type, struct terminal *term, int env_len) { @@ -281,6 +266,8 @@ term->spec->change_hook = screen_driver_change_hook; + term->utf8 = use_utf8_io(driver); + return driver; } @@ -299,6 +286,7 @@ /* Some simple probably useless MRU ;) */ move_to_top_of_list(active_screen_drivers, driver); + term->utf8 = use_utf8_io(driver); return driver; } @@ -364,11 +352,10 @@ #define compare_bg_color(a, b) (TERM_COLOR_BACKGROUND(a) == TERM_COLOR_BACKGROUND(b)) #define compare_fg_color(a, b) (TERM_COLOR_FOREGROUND(a) == TERM_COLOR_FOREGROUND(b)) -#define use_utf8_io(driver) ((driver)->charsets[0] != -1) static inline void add_char_data(struct string *screen, struct screen_driver *driver, - unsigned char data, unsigned char border) + unicode_val_T data, unsigned char border) { if (!isscreensafe(data)) { add_char_to_string(screen, ' '); @@ -379,13 +366,15 @@ data = driver->frame[data - 176]; if (use_utf8_io(driver)) { - int charset = driver->charsets[!!border]; - - add_to_string(screen, cp2utf_8(charset, data)); + if (border) + add_char_to_string(screen, (unsigned char)data); + else + if (data != UCS_NO_CHAR) + add_to_string(screen, encode_utf_8(data)); return; } - add_char_to_string(screen, data); + add_char_to_string(screen, (unsigned char)data); } /* Time critical section. */ diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/terminal/terminal.h elinks.2/src/terminal/terminal.h --- elinks/src/terminal/terminal.h 2005-12-02 10:57:11.022798000 +0100 +++ elinks.2/src/terminal/terminal.h 2005-12-22 17:54:00.971038750 +0100 @@ -110,6 +110,9 @@ * work and even maintaining these structures ;-). */ unsigned int master:1; + /* Indicates whether UTF-8 I/O is used */ + unsigned int utf8:1; + /* The current tab number. */ int current_tab; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/dump/dump.c elinks.2/src/viewer/dump/dump.c --- elinks/src/viewer/dump/dump.c 2005-12-02 10:57:11.038799000 +0100 +++ elinks.2/src/viewer/dump/dump.c 2005-12-22 17:54:00.979039250 +0100 @@ -26,6 +26,7 @@ #include "document/options.h" #include "document/renderer.h" #include "document/view.h" +#include "intl/charsets.h" #include "intl/gettext/libintl.h" #include "main/select.h" #include "main/main.h" @@ -319,10 +320,15 @@ add_document_to_string(struct string *string, struct document *document) { int y; + int utf8; assert(string && document); if_assert_failed return NULL; + utf8 = (codepages[document->options.cp & ~SYSTEM_CHARSET_FLAG].table + == table_utf_8); + + if (utf8) goto utf_8; for (y = 0; y < document->height; y++) { struct screen_char *pos = document->data[y].chars; int white = 0; @@ -354,7 +360,43 @@ add_char_to_string(string, '\n'); } + goto end; +utf_8: + for (y = 0; y < document->height; y++) { + struct screen_char *pos = document->data[y].chars; + int white = 0; + int x; + + for (x = 0; x < document->data[y].length; x++) { + uint16_t data = pos->data; + unsigned int frame = (pos->attr & SCREEN_ATTR_FRAME); + + if (!isscreensafe(data)) { + white++; + continue; + } else if (frame && data >= 176 && data < 224) { + data = frame_dumb[data - 176]; + if (data <= ' ') { + /* Count spaces. */ + white++; + } else { + /* Print spaces if any. */ + if (white) { + add_xchar_to_string(string, ' ', white); + white = 0; + } + if (frame) + add_char_to_string(string, data); + else + add_to_string(string, encode_utf_8(data)); + } + } + } + + add_char_to_string(string, '\n'); + } +end: return string; } @@ -378,10 +420,14 @@ { int y; int bptr = 0; + int utf8 = (codepages[document->options.cp & ~SYSTEM_CHARSET_FLAG].table + == table_utf_8); + unsigned char *buf = mem_alloc(D_BUF); if (!buf) return -1; + if (utf8) goto utf_8; for (y = 0; y < document->height; y++) { int white = 0; int x; @@ -418,13 +464,60 @@ if (write_char('\n', fd, buf, &bptr)) goto fail; } + goto ref; +utf_8: + for (y = 0; y < document->height; y++) { + int white = 0; + int x; + + for (x = 0; x < document->data[y].length; x++) { + uint16_t c; + unsigned char attr = document->data[y].chars[x].attr; + + c = document->data[y].chars[x].data; + + if ((attr & SCREEN_ATTR_FRAME) + && c >= 176 && c < 224) + c = frame_dumb[c - 176]; + else { + unsigned char *utf8_buf = encode_utf_8(c); + + while (*utf8_buf) { + if (write_char(*utf8_buf++, + fd, buf, &bptr)) goto fail; + } + continue; + } + + if (c <= ' ') { + /* Count spaces. */ + white++; + continue; + } + + /* Print spaces if any. */ + while (white) { + if (write_char(' ', fd, buf, &bptr)) + goto fail; + white--; + } + + /* Print normal char. */ + if (write_char(c, fd, buf, &bptr)) + goto fail; + } + + /* Print end of line. */ + if (write_char('\n', fd, buf, &bptr)) + goto fail; + } if (hard_write(fd, buf, bptr) != bptr) { fail: mem_free(buf); return -1; } - +ref: if (document->nlinks && get_opt_bool("document.dump.references")) { int x; unsigned char *header = "\nReferences\n\n Visible links\n"; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/text/form.c elinks.2/src/viewer/text/form.c --- elinks/src/viewer/text/form.c 2005-12-14 18:50:58.246122000 +0100 +++ elinks.2/src/viewer/text/form.c 2005-12-22 17:54:00.987039750 +0100 @@ -158,16 +158,22 @@ mem_free_set(&fs->value, NULL); switch (fc->type) { + unsigned char *text; + case FC_TEXT: case FC_PASSWORD: case FC_TEXTAREA: fs->value = stracpy(fc->default_value); fs->state = strlen(fc->default_value); + text = fs->value; + if (fc->type != FC_TEXTAREA) + fs->utf8_pos = strlen_utf8(&text); fs->vpos = 0; break; case FC_FILE: fs->value = stracpy(""); fs->state = 0; + fs->utf8_pos = 0; fs->vpos = 0; break; case FC_SELECT: @@ -332,12 +338,14 @@ dy = box->y - vs->y; switch (fc->type) { unsigned char *s; + unsigned char *text, *end; int len; int i, x, y; case FC_TEXT: case FC_PASSWORD: case FC_FILE: + if (term->utf8) goto utf_8; int_bounds(&fs->vpos, fs->state - fc->size + 1, fs->state); if (!link->npoints) break; @@ -362,6 +370,36 @@ draw_char_data(term, x, y, data); } break; +utf_8: + text = fs->value; + end = strchr(text, '\0'); + int_bounds(&fs->vpos, fs->utf8_pos - fc->size + 1, fs->utf8_pos); + if (!link->npoints) break; + + y = link->points[0].y + dy; + if (!row_is_in_box(box, y)) + break; + for (i = 0; i < fs->vpos; i++) { + utf_8_to_unicode(&text, end); + } + s = text; + len = strlen_utf8(&s); + x = link->points[0].x + dx; + + for (i = 0; i < fc->size; i++, x++) { + uint16_t data; + + if (!col_is_in_box(box, x)) continue; + + if (fs->value && i >= -fs->vpos && i < len) + data = fc->type != FC_PASSWORD + ? utf_8_to_unicode(&text, end) : '*'; + else + data = '_'; + + draw_char_data(term, x, y, data); + } + break; case FC_TEXTAREA: draw_textarea(term, fs, doc_view, link); break; @@ -380,6 +418,7 @@ else /* XXX: when can this happen? --pasky */ s = ""; + if (term->utf8) goto utf_8_select; len = s ? strlen(s) : 0; for (i = 0; i < link->npoints; i++) { x = link->points[i].x + dx; @@ -388,6 +427,18 @@ draw_char_data(term, x, y, i < len ? s[i] : '_'); } break; +utf_8_select: + text = s; + end = strchr(s, '\0'); + len = strlen_utf8(&text); + for (i = 0; i < link->npoints; i++) { + x = link->points[i].x + dx; + y = link->points[i].y + dy; + if (is_in_box(box, x, y)) + draw_char_data(term, x, y, i < len + ? utf_8_to_unicode(&s, end) : '_'); + } + break; case FC_SUBMIT: case FC_IMAGE: case FC_RESET: @@ -1197,6 +1248,7 @@ unsigned char *text; int length; enum frame_event_status status = FRAME_EVENT_REFRESH; + int utf8 = ses->tab->term->utf8; assert(ses && doc_view && link && ev); if_assert_failed return FRAME_EVENT_OK; @@ -1216,49 +1268,79 @@ switch (action_id) { case ACT_EDIT_LEFT: - fs->state = int_max(fs->state - 1, 0); + if (utf8) { + unsigned char *text = fs->value; + unsigned char *end = fs->value + fs->state - 1; + int old = fs->state; + + while (utf_8_to_unicode(&text, end) != UCS_NO_CHAR); + fs->state = (int)(text - fs->value); + if (old != fs->state) fs->utf8_pos--; + } else + fs->state = int_max(fs->state - 1, 0); break; case ACT_EDIT_RIGHT: - fs->state = int_min(fs->state + 1, strlen(fs->value)); + if (utf8) { + unsigned char *text = fs->value + fs->state; + unsigned char *end = strchr(text, '\0'); + int old = fs->state; + + utf_8_to_unicode(&text, end); + fs->state = (int)(text - fs->value); + if (old != fs->state) fs->utf8_pos++; + } else + fs->state = int_min(fs->state + 1, strlen(fs->value)); break; case ACT_EDIT_HOME: if (fc->type == FC_TEXTAREA) { - status = textarea_op_home(fs, fc); + status = textarea_op_home(fs, fc, utf8); } else { fs->state = 0; + fs->utf8_pos = 0; } break; case ACT_EDIT_UP: if (fc->type != FC_TEXTAREA) status = FRAME_EVENT_IGNORED; else - status = textarea_op_up(fs, fc); + status = textarea_op_up(fs, fc, utf8); break; case ACT_EDIT_DOWN: if (fc->type != FC_TEXTAREA) status = FRAME_EVENT_IGNORED; else - status = textarea_op_down(fs, fc); + status = textarea_op_down(fs, fc, utf8); break; case ACT_EDIT_END: if (fc->type == FC_TEXTAREA) { - status = textarea_op_end(fs, fc); + status = textarea_op_end(fs, fc, utf8); } else { fs->state = strlen(fs->value); + if (utf8) { + unsigned char *text = fs->value; + + fs->utf8_pos = strlen_utf8(&text); + } } break; case ACT_EDIT_BEGINNING_OF_BUFFER: if (fc->type == FC_TEXTAREA) { - status = textarea_op_bob(fs, fc); + status = textarea_op_bob(fs, fc, utf8); } else { fs->state = 0; + fs->utf8_pos = 0; } break; case ACT_EDIT_END_OF_BUFFER: if (fc->type == FC_TEXTAREA) { - status = textarea_op_eob(fs, fc); + status = textarea_op_eob(fs, fc, utf8); } else { fs->state = strlen(fs->value); + if (utf8) { + unsigned char *text = fs->value; + + fs->utf8_pos = strlen_utf8(&text); + } } break; case ACT_EDIT_OPEN_EXTERNAL: @@ -1276,6 +1358,7 @@ if (!form_field_is_readonly(fc)) fs->value[0] = 0; fs->state = 0; + fs->utf8_pos = 0; break; case ACT_EDIT_PASTE_CLIPBOARD: if (form_field_is_readonly(fc)) break; @@ -1291,13 +1374,19 @@ fs->value = v; memmove(v, text, length + 1); fs->state = strlen(fs->value); + if (utf8 && fc->type != FC_TEXTAREA) { + unsigned char *text = fs->value; + + fs->utf8_pos = strlen_utf8(&text); + } + } } mem_free(text); break; case ACT_EDIT_ENTER: if (fc->type == FC_TEXTAREA) { - status = textarea_op_enter(fs, fc); + status = textarea_op_enter(fs, fc, utf8); break; } @@ -1322,7 +1411,19 @@ status = FRAME_EVENT_OK; break; } - + if (utf8) { + int i; + unsigned char *text = fs->value; + unsigned char *end = fs->value + fs->state; + + for (i = 0; i < fs->utf8_pos - 1; i++) + utf_8_to_unicode(&text, end); + length = strlen(end) + 1; + memmove(text, end, length); + fs->state = (int)(text - fs->value); + fs->utf8_pos--; + break; + } length = strlen(fs->value + fs->state) + 1; text = fs->value + fs->state; @@ -1340,7 +1441,18 @@ status = FRAME_EVENT_OK; break; } - + if (utf8) { + unsigned char *end = fs->value + length; + unsigned char *text = fs->value + fs->state; + unsigned char *old = text; + + utf_8_to_unicode(&text, end); + if (old != text) { + memmove(old, text, + (int)(end - text) + 1); + } + break; + } text = fs->value + fs->state; memmove(text, text + 1, length - fs->state); @@ -1370,6 +1482,11 @@ memmove(text, fs->value + fs->state, length); fs->state = (int) (text - fs->value); + if (utf8 && fc->type != FC_TEXTAREA) { + unsigned char *text = fs->value; + + fs->utf8_pos = strlen_utf8(&text); + } break; case ACT_EDIT_KILL_TO_EOL: if (form_field_is_readonly(fc)) { @@ -1423,13 +1540,41 @@ } if (form_field_is_readonly(fc) - || strlen(fs->value) >= fc->maxlength - || !insert_in_string(&fs->value, fs->state, "?", 1)) { + || strlen(fs->value) >= fc->maxlength) { status = FRAME_EVENT_OK; break; } - - fs->value[fs->state++] = get_kbd_key(ev); + if (utf8) { + static unsigned char buf[7]; + static int i = 0; + unicode_val_T data; + unsigned char *t; + + t = buf; + buf[i++] = get_kbd_key(ev); + buf[i] = 0; + data = utf_8_to_unicode(&t, buf + i); + if (data != UCS_NO_CHAR) { + if (!insert_in_string(&fs->value, fs->state, buf, i)) { + i = 0; + return FRAME_EVENT_OK; + } + fs->state += i; + fs->utf8_pos++; + i = 0; + break; + } + if (i == 6) { + i = 0; + return FRAME_EVENT_OK; + } else { + return FRAME_EVENT_OK; + } + } else { + if (!insert_in_string(&fs->value, fs->state, "?", 1)) + return FRAME_EVENT_OK; + fs->value[fs->state++] = get_kbd_key(ev); + } break; } diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/text/form.h elinks.2/src/viewer/text/form.h --- elinks/src/viewer/text/form.h 2005-12-02 10:57:11.038799000 +0100 +++ elinks.2/src/viewer/text/form.h 2005-12-22 17:54:00.991040000 +0100 @@ -39,6 +39,7 @@ unsigned char *value; int state; + int utf8_pos; int vpos; int vypos; diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/text/link.c elinks.2/src/viewer/text/link.c --- elinks/src/viewer/text/link.c 2005-12-14 18:50:58.246122000 +0100 +++ elinks.2/src/viewer/text/link.c 2005-12-22 17:54:00.999040500 +0100 @@ -106,6 +106,7 @@ { struct form_control *fc; struct form_state *fs; + int utf8 = doc_view->document->options.utf8; switch (link->type) { case LINK_CHECKBOX: @@ -117,12 +118,15 @@ case LINK_FIELD: fc = get_link_form_control(link); fs = find_form_state(doc_view, fc); - return fs ? fs->state - fs->vpos : 0; + if (utf8) { + return fs ? fs->utf8_pos - fs->vpos : 0; + } else + return fs ? fs->state - fs->vpos : 0; case LINK_AREA: fc = get_link_form_control(link); fs = find_form_state(doc_view, fc); - return fs ? area_cursor(fc, fs) : 0; + return fs ? area_cursor(fc, fs, utf8) : 0; case LINK_HYPERTEXT: case LINK_MAP: diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/text/textarea.c elinks.2/src/viewer/text/textarea.c --- elinks/src/viewer/text/textarea.c 2005-12-22 17:44:54.572891000 +0100 +++ elinks.2/src/viewer/text/textarea.c 2005-12-22 17:54:01.063044500 +0100 @@ -138,7 +138,7 @@ /* Fixes up the vpos and vypos members of the form_state. Returns the * logical position in the textarea view. */ int -area_cursor(struct form_control *fc, struct form_state *fs) +area_cursor(struct form_control *fc, struct form_state *fs, int utf8) { struct line_info *line; int x, y; @@ -155,7 +155,15 @@ return 0; } - x = fs->state - line[y].start; + if (utf8) { + unsigned char *text = fs->value + line[y].start; + unsigned char tmp = fs->value[fs->state]; + + fs->value[fs->state] = '\0'; + x = strlen_utf8(&text); + fs->value[fs->state] = tmp; + } else + x = fs->state - line[y].start; mem_free(line); @@ -170,6 +178,85 @@ return y * fc->cols + x; } +static void +draw_textarea_utf8(struct terminal *term, struct form_state *fs, + struct document_view *doc_view, struct link *link) +{ + struct line_info *line, *linex; + struct form_control *fc; + struct box *box; + int vx, vy; + int sl, ye; + int x, y; + + assert(term && doc_view && doc_view->document && doc_view->vs && link); + if_assert_failed return; + fc = get_link_form_control(link); + assertm(fc, "link %d has no form control", (int) (link - doc_view->document->links)); + if_assert_failed return; + + box = &doc_view->box; + vx = doc_view->vs->x; + vy = doc_view->vs->y; + + if (!link->npoints) return; + area_cursor(fc, fs, 1); + linex = format_text(fs->value, fc->cols, fc->wrap, 0); + if (!linex) return; + line = linex; + sl = fs->vypos; + while (line->start != -1 && sl) sl--, line++; + + x = link->points[0].x + box->x - vx; + y = link->points[0].y + box->y - vy; + ye = y + fc->rows; + + for (; line->start != -1 && y < ye; line++, y++) { + int i; + unsigned char *text, *end; + + text = fs->value + line->start; + end = fs->value + line->end; + + for (i = 0; i < fs->vpos; i++) + utf_8_to_unicode(&text, end); + + if (!row_is_in_box(box, y)) continue; + + for (i = 0; i < fc->cols; i++) { + uint16_t data; + int xi = x + i; + + if (!col_is_in_box(box, xi)) + continue; + + if (i >= -fs->vpos + && text < end) + data = utf_8_to_unicode(&text, end); + else + data = '_'; + + draw_char_data(term, xi, y, data); + } + } + + for (; y < ye; y++) { + int i; + + if (!row_is_in_box(box, y)) continue; + + for (i = 0; i < fc->cols; i++) { + int xi = x + i; + + if (col_is_in_box(box, xi)) + draw_char_data(term, xi, y, '_'); + } + } + + mem_free(linex); +} + + void draw_textarea(struct terminal *term, struct form_state *fs, struct document_view *doc_view, struct link *link) @@ -183,6 +270,11 @@ assert(term && doc_view && doc_view->document && doc_view->vs && link); if_assert_failed return; + + if (term->utf8) { + draw_textarea_utf8(term, fs, doc_view, link); + return; + } fc = get_link_form_control(link); assertm(fc, "link %d has no form control", (int) (link - doc_view->document->links)); if_assert_failed return; @@ -192,7 +284,7 @@ vy = doc_view->vs->y; if (!link->npoints) return; - area_cursor(fc, fs); + area_cursor(fc, fs, 0); linex = format_text(fs->value, fc->cols, fc->wrap, 0); if (!linex) return; line = linex; @@ -448,8 +540,8 @@ } static enum frame_event_status -textarea_op(struct form_state *fs, struct form_control *fc, - int (*do_op)(struct form_state *, struct line_info *, int)) +textarea_op(struct form_state *fs, struct form_control *fc, int utf8, + int (*do_op)(struct form_state *, struct line_info *, int, int)) { struct line_info *line; int current, state; @@ -462,47 +554,87 @@ current = get_textarea_line_number(line, fs->state); state = fs->state; - if (do_op(fs, line, current)) { + if (do_op(fs, line, current, utf8)) { mem_free(line); return FRAME_EVENT_IGNORED; } mem_free(line); - return fs->state == state ? FRAME_EVENT_OK : FRAME_EVENT_REFRESH; } static int -do_op_home(struct form_state *fs, struct line_info *line, int current) +x_pos(struct form_state *fs, struct line_info *line, int current) +{ + unsigned char *text = fs->value + line[current].start; + unsigned char tmp = fs->value[fs->state]; + int len; + + fs->value[fs->state] = '\0'; + len = strlen_utf8(&text); + fs->value[fs->state] = tmp; + return len; +} + +static void +new_pos(struct form_state *fs, struct line_info *line, int current, int len) +{ + unsigned char *text = fs->value + line[current].start; + unsigned char *end = fs->value + line[current].end; + int i; + + for (i = 0; i < len; i++) { + unicode_val_T data = utf_8_to_unicode(&text, end); + + if (data == UCS_NO_CHAR) break; + } + fs->state = (int)(text - fs->value); +} + +static int +do_op_home(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current != -1) fs->state = line[current].start; return 0; } static int -do_op_up(struct form_state *fs, struct line_info *line, int current) +do_op_up(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current == -1) return 0; if (!current) return 1; + if (utf8) { + int len = x_pos(fs, line, current); + + new_pos(fs, line, current - 1, len); + return 0; + } + fs->state -= line[current].start - line[current-1].start; int_upper_bound(&fs->state, line[current-1].end); return 0; } static int -do_op_down(struct form_state *fs, struct line_info *line, int current) +do_op_down(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current == -1) return 0; if (line[current+1].start == -1) return 1; + if (utf8) { + int len = x_pos(fs, line, current); + + new_pos(fs, line, current + 1, len); + return 0; + } fs->state += line[current+1].start - line[current].start; int_upper_bound(&fs->state, line[current+1].end); return 0; } static int -do_op_end(struct form_state *fs, struct line_info *line, int current) +do_op_end(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current == -1) { fs->state = strlen(fs->value); @@ -517,7 +649,7 @@ } static int -do_op_bob(struct form_state *fs, struct line_info *line, int current) +do_op_bob(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current == -1) return 0; @@ -527,7 +659,7 @@ } static int -do_op_eob(struct form_state *fs, struct line_info *line, int current) +do_op_eob(struct form_state *fs, struct line_info *line, int current, int utf8) { if (current == -1) { fs->state = strlen(fs->value); @@ -544,35 +676,35 @@ } enum frame_event_status -textarea_op_home(struct form_state *fs, struct form_control *fc) +textarea_op_home(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_home); + return textarea_op(fs, fc, utf8, do_op_home); } enum frame_event_status -textarea_op_up(struct form_state *fs, struct form_control *fc) +textarea_op_up(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_up); + return textarea_op(fs, fc, utf8, do_op_up); } enum frame_event_status -textarea_op_down(struct form_state *fs, struct form_control *fc) +textarea_op_down(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_down); + return textarea_op(fs, fc, utf8, do_op_down); } enum frame_event_status -textarea_op_end(struct form_state *fs, struct form_control *fc) +textarea_op_end(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_end); + return textarea_op(fs, fc, utf8, do_op_end); } /* Set the form state so the cursor is on the first line of the buffer. * Preserve the column if possible. */ enum frame_event_status -textarea_op_bob(struct form_state *fs, struct form_control *fc) +textarea_op_bob(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_bob); + return textarea_op(fs, fc, utf8, do_op_bob); } /* Set the form state so the cursor is on the last line of the buffer. Preserve @@ -580,13 +712,13 @@ * then shifting the state by the delta of both lines start position bounding * the whole thing to the end of the last line. */ enum frame_event_status -textarea_op_eob(struct form_state *fs, struct form_control *fc) +textarea_op_eob(struct form_state *fs, struct form_control *fc, int utf8) { - return textarea_op(fs, fc, do_op_eob); + return textarea_op(fs, fc, utf8, do_op_eob); } enum frame_event_status -textarea_op_enter(struct form_state *fs, struct form_control *fc) +textarea_op_enter(struct form_state *fs, struct form_control *fc, int utf8) { assert(fs && fs->value && fc); if_assert_failed return FRAME_EVENT_OK; @@ -607,6 +739,7 @@ struct form_control *fc; struct form_state *fs; struct link *link; + int utf8 = doc_view->document->options.utf8; assert(doc_view && doc_view->vs && doc_view->document); assert(direction == 1 || direction == -1); @@ -628,7 +761,7 @@ /* Depending on which way we entered the textarea move cursor so that * it is available at end or start. */ if (direction == 1) - textarea_op_eob(fs, fc); + textarea_op_eob(fs, fc, utf8); else - textarea_op_bob(fs, fc); + textarea_op_bob(fs, fc, utf8); } diff -Nru -x '*.orig' -x elinks -x '*.o' elinks/src/viewer/text/textarea.h elinks.2/src/viewer/text/textarea.h --- elinks/src/viewer/text/textarea.h 2005-12-02 10:57:11.042799250 +0100 +++ elinks.2/src/viewer/text/textarea.h 2005-12-22 17:54:01.067044750 +0100 @@ -13,7 +13,7 @@ struct session; struct terminal; -int area_cursor(struct form_control *fc, struct form_state *fs); +int area_cursor(struct form_control *fc, struct form_state *fs, int utf8); void draw_textarea(struct terminal *term, struct form_state *fs, struct document_view *doc_view, struct link *link); unsigned char *encode_textarea(struct submitted_value *sv); @@ -21,13 +21,13 @@ void textarea_edit(int, struct terminal *, struct form_state *, struct document_view *, struct link *); void menu_textarea_edit(struct terminal *term, void *xxx, void *ses_); -enum frame_event_status textarea_op_home(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_up(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_down(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_end(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_bob(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_eob(struct form_state *fs, struct form_control *fc); -enum frame_event_status textarea_op_enter(struct form_state *fs, struct form_control *fc); +enum frame_event_status textarea_op_home(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_up(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_down(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_end(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_bob(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_eob(struct form_state *fs, struct form_control *fc, int utf8); +enum frame_event_status textarea_op_enter(struct form_state *fs, struct form_control *fc, int utf8); void set_textarea(struct document_view *doc_view, int direction);