diff --git a/src/unicode.c b/src/unicode.c index 3fe6d99..0b14915 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -25,7 +25,7 @@ */ /* - * This kmscon-utf8-state-machine is based on the wayland-compositor demos: + * The tsm-utf8-state-machine is based on the wayland-compositor demos: * * Copyright © 2008 Kristian Høgsberg * @@ -49,8 +49,43 @@ */ /* - * Unicode Handling - * Main implementation of the symbol datatype. The symbol table contains two-way + * Unicode Helpers + * This implements several helpers for Unicode/UTF8/UCS4 input and output. See + * below for comments on each helper. + */ + +#include +#include +#include +#include +#include +#include "log.h" +#include "static_misc.h" +#include "unicode.h" + +#define LOG_SUBSYSTEM "unicode" + +/* + * Unicode Symbol Handling + * The main goal of the kmscon_symbol_* functions is to provide a datatype which + * can contain the representation of any printable character. This includes all + * basic Unicode characters but also combined characters. + * To avoid all the memory management we still represent a character as a single + * integer value (kmscon_symbol_t) but internally we allocate a string which is + * represented by this value. + * + * A kmscon_symbol_t is an integer which represents a single character point. + * For most Unicode characters this is simply the UCS4 representation. In fact, + * every UCS4 characters is a valid kmscon_symbol_t object. + * However, Unicode standard allows combining marks. Therefore, some characters + * consists of more than one Unicode character. + * A global symbol-table provides all those combined characters as single + * integers. You simply create a valid base character and append your combining + * marks and the table will return a new valid kmscon_symbol_t. It is no longer + * a valid UCS4 value, though. But no memory management is needed as all + * kmscon_symbol_t objects are simple integers. + * + * The symbol table contains two-way * references. The Hash Table contains all the symbols with the symbol ucs4 * string as key and the symbol ID as value. * The index array contains the symbol ID as key and a pointer to the ucs4 @@ -64,19 +99,6 @@ * push the new symbol into the symbol table. */ -/* TODO: Remove the glib dependencies */ - -#include -#include -#include -#include -#include -#include "log.h" -#include "static_misc.h" -#include "unicode.h" - -#define LOG_SUBSYSTEM "unicode" - #define KMSCON_UCS4_MAXLEN 10 #define KMSCON_UCS4_MAX 0x7fffffffUL #define KMSCON_UCS4_INVALID 0xfffd diff --git a/src/unicode.h b/src/unicode.h index 3b69e64..84312ca 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -25,24 +25,9 @@ */ /* - * Unicode Handling - * The main goal of the kmscon_symbol_* functions is to provide a datatype which - * can contain the representation of any printable character. This includes all - * basic Unicode characters but also combined characters. - * To avoid all the memory management we still represent a character as a single - * integer value (kmscon_symbol_t) but internally we allocate a string which is - * represented by this value. - * - * A kmscon_symbol_t is an integer which represents a single character point. - * For most Unicode characters this is simply the UCS4 representation. In fact, - * every UCS4 characters is a valid kmscon_symbol_t object. - * However, Unicode standard allows combining marks. Therefore, some characters - * consists of more than one Unicode character. - * A global symbol-table provides all those combined characters as single - * integers. You simply create a valid base character and append your combining - * marks and the table will return a new valid kmscon_symbol_t. It is no longer - * a valid UCS4 value, though. But no memory management is needed as all - * kmscon_symbol_t objects are simple integers. + * Unicode Helpers + * This file provides small helpers to make working with Unicode/UTF8/UCS4 input + * and output much easier. */ #ifndef KMSCON_UNICODE_H @@ -51,6 +36,8 @@ #include #include +/* UCS4 helpers */ + #define TSM_UCS4_MAX (0x7fffffffUL) #define TSM_UCS4_INVALID (TSM_UCS4_MAX + 1) #define TSM_UCS4_REPLACEMENT (0xfffdUL)