diff --git a/Makefile.am b/Makefile.am index b71aa70..c4b266d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -49,6 +49,7 @@ libkmscon_core_la_SOURCES = \ src/eloop.c src/eloop.h \ src/vt.c src/vt.h \ src/vte.c src/vte.h \ + src/vte_charsets.c \ src/terminal.c src/terminal.h \ src/pty.c src/pty.h \ src/uterm.h src/uterm_internal.h \ diff --git a/src/vte.h b/src/vte.h index d781152..779fe65 100644 --- a/src/vte.h +++ b/src/vte.h @@ -37,6 +37,17 @@ #include "console.h" #include "unicode.h" +/* available character sets */ + +typedef kmscon_symbol_t kmscon_vte_charset[96]; + +extern kmscon_vte_charset kmscon_vte_unicode_lower; +extern kmscon_vte_charset kmscon_vte_unicode_upper; +extern kmscon_vte_charset kmscon_vte_dec_supplemental_graphics; +extern kmscon_vte_charset kmscon_vte_dec_special_graphics; + +/* virtual terminal emulator */ + struct kmscon_vte; enum kmscon_vte_keyboard_action { diff --git a/src/vte_charsets.c b/src/vte_charsets.c new file mode 100644 index 0000000..7eedd71 --- /dev/null +++ b/src/vte_charsets.c @@ -0,0 +1,501 @@ +/* + * kmscon - VT Emulator + * + * Copyright (c) 2012 David Herrmann + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * VTE Character Sets + * These are predefined charactersets that can be loaded into GL and GR. By + * default we use unicode_lower and unicode_upper, that is, both sets have the + * exact unicode mapping. unicode_lower is effectively ASCII and unicode_upper + * as defined by the unicode standard. + * Several other character sets are defined here. However, all of them are + * limited to the 96 character space of GL or GR. Everything beyond GR (which + * was not supported by the classic VTs by DEC but is available in VT emulators + * that support unicode/UTF8) is always mapped to unicode and cannot be changed + * by these character sets. Even mapping GL and GR is only available for + * backwards compatibility as new applications can use the Unicode functionality + * of the VTE. + * + * Moreover, mapping GR is almost unnecessary to support. In fact, Unicode UTF-8 + * support in VTE works by reading every incoming data as UTF-8 stream. This + * maps GL/ASCII to ASCII, as UTF-8 is backwards compatible to ASCII, however, + * everything that has the 8th bit set is a >=2-byte haracter in UTF-8. That is, + * this is in no way backwards compatible to >=VT220 8bit support. Therefore, if + * someone maps a character set into GR and wants to use them with this VTE, + * then they must already send UTF-8 characters to use GR (all GR characters are + * 8-bits). Hence, they can easily also send the correct UTF-8 character for the + * unicode mapping. + * The only advantage is that most characters in many sets are 3-byte UTF-8 + * characters and by mapping the set into GR/GL you can use 2 or 1 byte UTF-8 + * characters which saves bandwidth. + * Another reason is, if you have older applications that use the VT220 8-bit + * support and you put a ASCII/8bit-extension to UTF-8 converter in between, you + * need these mappings to have the application behave correctly if it uses GL/GR + * mappings extensively. + * + * Anyway, we support GL/GR mappings so here are the most commonly used maps as + * defined by Unicode-standard, DEC-private maps and other famous charmaps. + * + * Characters 1-32 are always the control characters (part of CL) and cannot be + * mapped. Characters 34-127 (94 characters) are part of GL and can be mapped. + * Characters 33 and 128 are not part of GL and always mapped by VTE but are + * included here in the maps for alignment reasons but always set to 0. + */ + +#include +#include +#include + +#include "log.h" +#include "unicode.h" +#include "vte.h" + +#define LOG_SUBSYSTEM "vte" + +/* + * Lower Unicode character set. This maps the characters to the basic ASCII + * characters 33-126. These are all graphics characters defined in ASCII. The + * first an last entry are never used so we can safely set them to anything. + */ +kmscon_vte_charset kmscon_vte_unicode_lower = { + [0] = 0, + [1] = 33, + [2] = 34, + [3] = 35, + [4] = 36, + [5] = 37, + [6] = 38, + [7] = 39, + [8] = 40, + [9] = 41, + [10] = 42, + [11] = 43, + [12] = 44, + [13] = 45, + [14] = 46, + [15] = 47, + [16] = 48, + [17] = 49, + [18] = 50, + [19] = 51, + [20] = 52, + [21] = 53, + [22] = 54, + [23] = 55, + [24] = 56, + [25] = 57, + [26] = 58, + [27] = 59, + [28] = 60, + [29] = 61, + [30] = 62, + [31] = 63, + [32] = 64, + [33] = 65, + [34] = 66, + [35] = 67, + [36] = 68, + [37] = 69, + [38] = 70, + [39] = 71, + [40] = 72, + [41] = 73, + [42] = 74, + [43] = 75, + [44] = 76, + [45] = 77, + [46] = 78, + [47] = 79, + [48] = 80, + [49] = 81, + [50] = 82, + [51] = 83, + [52] = 84, + [53] = 85, + [54] = 86, + [55] = 87, + [56] = 88, + [57] = 89, + [58] = 90, + [59] = 91, + [60] = 92, + [61] = 93, + [62] = 94, + [63] = 95, + [64] = 96, + [65] = 97, + [66] = 98, + [67] = 99, + [68] = 100, + [69] = 101, + [70] = 102, + [71] = 103, + [72] = 104, + [73] = 105, + [74] = 106, + [75] = 107, + [76] = 108, + [77] = 109, + [78] = 110, + [79] = 111, + [80] = 112, + [81] = 113, + [82] = 114, + [83] = 115, + [84] = 116, + [85] = 117, + [86] = 118, + [87] = 119, + [88] = 120, + [89] = 121, + [90] = 122, + [91] = 123, + [92] = 124, + [93] = 125, + [94] = 126, + [95] = 0, +}; + +/* + * Upper Unicode Table + * This maps all characters to the upper unicode characters 161-254. These are + * not compatible to any older 8 bit character sets. See the Unicode standard + * for the definitions of each symbol. Again, the first an last entry are never + * used so set them to 0. + */ +kmscon_vte_charset kmscon_vte_unicode_upper = { + [0] = 0, + [1] = 161, + [2] = 162, + [3] = 163, + [4] = 164, + [5] = 165, + [6] = 166, + [7] = 167, + [8] = 168, + [9] = 169, + [10] = 170, + [11] = 171, + [12] = 172, + [13] = 173, + [14] = 174, + [15] = 175, + [16] = 176, + [17] = 177, + [18] = 178, + [19] = 179, + [20] = 180, + [21] = 181, + [22] = 182, + [23] = 183, + [24] = 184, + [25] = 185, + [26] = 186, + [27] = 187, + [28] = 188, + [29] = 189, + [30] = 190, + [31] = 191, + [32] = 192, + [33] = 193, + [34] = 194, + [35] = 195, + [36] = 196, + [37] = 197, + [38] = 198, + [39] = 199, + [40] = 200, + [41] = 201, + [42] = 202, + [43] = 203, + [44] = 204, + [45] = 205, + [46] = 206, + [47] = 207, + [48] = 208, + [49] = 209, + [50] = 210, + [51] = 211, + [52] = 212, + [53] = 213, + [54] = 214, + [55] = 215, + [56] = 216, + [57] = 217, + [58] = 218, + [59] = 219, + [60] = 220, + [61] = 221, + [62] = 222, + [63] = 223, + [64] = 224, + [65] = 225, + [66] = 226, + [67] = 227, + [68] = 228, + [69] = 229, + [70] = 230, + [71] = 231, + [72] = 232, + [73] = 233, + [74] = 234, + [75] = 235, + [76] = 236, + [77] = 237, + [78] = 238, + [79] = 239, + [80] = 240, + [81] = 241, + [82] = 242, + [83] = 243, + [84] = 244, + [85] = 245, + [86] = 246, + [87] = 247, + [88] = 248, + [89] = 249, + [90] = 250, + [91] = 251, + [92] = 252, + [93] = 253, + [94] = 254, + [95] = 0, +}; + +/* + * The DEC supplemental graphics set. For its definition see here: + * http://vt100.net/docs/vt220-rm/table2-3b.html + * Its basically a mixture of common European symbols that are not part of + * ASCII. Most often, this is mapped into GR to extend the basci ASCII part. + * + * This is very similar to unicode_upper, however, few symbols differ so do not + * mix them up! + */ +kmscon_vte_charset kmscon_vte_dec_supplemental_graphics = { + [0] = 0, + [1] = 161, + [2] = 162, + [3] = 163, + [4] = 0, + [5] = 165, + [6] = 0, + [7] = 167, + [8] = 164, + [9] = 169, + [10] = 170, + [11] = 171, + [12] = 0, + [13] = 0, + [14] = 0, + [15] = 0, + [16] = 176, + [17] = 177, + [18] = 178, + [19] = 179, + [20] = 0, + [21] = 181, + [22] = 182, + [23] = 183, + [24] = 0, + [25] = 185, + [26] = 186, + [27] = 187, + [28] = 188, + [29] = 189, + [30] = 0, + [31] = 191, + [32] = 192, + [33] = 193, + [34] = 194, + [35] = 195, + [36] = 196, + [37] = 197, + [38] = 198, + [39] = 199, + [40] = 200, + [41] = 201, + [42] = 202, + [43] = 203, + [44] = 204, + [45] = 205, + [46] = 206, + [47] = 207, + [48] = 0, + [49] = 209, + [50] = 210, + [51] = 211, + [52] = 212, + [53] = 213, + [54] = 214, + [55] = 338, + [56] = 216, + [57] = 217, + [58] = 218, + [59] = 219, + [60] = 220, + [61] = 376, + [62] = 0, + [63] = 223, + [64] = 224, + [65] = 225, + [66] = 226, + [67] = 227, + [68] = 228, + [69] = 229, + [70] = 230, + [71] = 231, + [72] = 232, + [73] = 233, + [74] = 234, + [75] = 235, + [76] = 236, + [77] = 237, + [78] = 238, + [79] = 239, + [80] = 0, + [81] = 241, + [82] = 242, + [83] = 243, + [84] = 244, + [85] = 245, + [86] = 246, + [87] = 339, + [88] = 248, + [89] = 249, + [90] = 250, + [91] = 251, + [92] = 252, + [93] = 255, + [94] = 0, + [95] = 0, +}; + +/* + * DEC special graphics character set. See here for its definition: + * http://vt100.net/docs/vt220-rm/table2-4.html + * This contains several characters to create ASCII drawings and similar. Its + * commonaly mapped into GR to extend the basic ASCII characters. + * + * Lower 62 characters map to ASCII 33-64, everything beyond is special and + * commonly used for ASCII drawings. It depends on the Unicode Standard 3.2 for + * the extended horizontal scan-line characters 3, 5, 7, and 9. + */ +kmscon_vte_charset kmscon_vte_dec_special_graphics = { + [0] = 0, + [1] = 33, + [2] = 34, + [3] = 35, + [4] = 36, + [5] = 37, + [6] = 38, + [7] = 39, + [8] = 40, + [9] = 41, + [10] = 42, + [11] = 43, + [12] = 44, + [13] = 45, + [14] = 46, + [15] = 47, + [16] = 48, + [17] = 49, + [18] = 50, + [19] = 51, + [20] = 52, + [21] = 53, + [22] = 54, + [23] = 55, + [24] = 56, + [25] = 57, + [26] = 58, + [27] = 59, + [28] = 60, + [29] = 61, + [30] = 62, + [31] = 63, + [32] = 64, + [33] = 65, + [34] = 66, + [35] = 67, + [36] = 68, + [37] = 69, + [38] = 70, + [39] = 71, + [40] = 72, + [41] = 73, + [42] = 74, + [43] = 75, + [44] = 76, + [45] = 77, + [46] = 78, + [47] = 79, + [48] = 80, + [49] = 81, + [50] = 82, + [51] = 83, + [52] = 84, + [53] = 85, + [54] = 86, + [55] = 87, + [56] = 88, + [57] = 89, + [58] = 90, + [59] = 91, + [60] = 92, + [61] = 93, + [62] = 94, + [63] = 0, + [64] = 9830, + [65] = 9618, + [66] = 9225, + [67] = 9228, + [68] = 9229, + [69] = 9226, + [70] = 176, + [71] = 177, + [72] = 9252, + [73] = 9227, + [74] = 9496, + [75] = 9488, + [76] = 9484, + [77] = 9492, + [78] = 9532, + [79] = 9146, + [80] = 9147, + [81] = 9472, + [82] = 9148, + [83] = 9149, + [84] = 9500, + [85] = 9508, + [86] = 9524, + [87] = 9516, + [88] = 9474, + [89] = 8804, + [90] = 8805, + [91] = 960, + [92] = 8800, + [93] = 163, + [94] = 8901, + [95] = 0, +};