diff --git a/iast.c b/iast.c index 4c957b1..3c0cf2e 100644 --- a/iast.c +++ b/iast.c @@ -2,175 +2,97 @@ #include #include + #include "iast.h" - -static inline void transliteration_modifier_apply(struct syllable *syllable, - const char *modified) -{ - char buffer[10]; - - strcpy(buffer, syllable->data); - buffer[strlen(buffer) - 1] = 0; - strcat(buffer, modified); - - free(syllable->data); - syllable->data = strdup(buffer); -} - -static void transliteration_modifier_aa(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ā"); -} - -static void transliteration_modifier_i(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "i"); -} - -static void transliteration_modifier_ii(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ī"); -} - -static void transliteration_modifier_u(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "u"); -} - -static void transliteration_modifier_uu(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ū"); -} - -static void transliteration_modifier_r(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ṛ"); -} - -static void transliteration_modifier_rr(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ṝ"); -} - -static void transliteration_modifier_l(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ḷ"); -} - -static void transliteration_modifier_ll(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ḹ"); -} - -static void transliteration_modifier_e(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "e"); -} - -static void transliteration_modifier_o(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "o"); -} - -static void transliteration_modifier_ai(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, "ai"); -} - -static void transliteration_modifier_virama(struct syllable *syllable) -{ - transliteration_modifier_apply(syllable, ""); -} +#include "transliteration.h" -static const struct transliteration_letter table_letters[] = { +static const struct transliteration_letter table[] = { /* Vowels */ - {0x0905, "a"}, /* अ */ - {0x0906, "ā"}, /* आ */ - {0x0907, "i"}, /* इ */ - {0x0908, "ī"}, /* ई */ - {0x0909, "u"}, /* उ */ - {0x090a, "ū"}, /* ऊ */ - {0x090b, "ṛ"}, /* ऋ */ - {0x0960, "ṝ"}, /* ॠ */ - {0x090c, "ḷ"}, /* ऌ */ - {0x0961, "ḹ"}, /* ॡ */ - {0x090f, "e"}, /* ए */ - {0x0910, "ai"}, /* ऐ */ - {0x0913, "o"}, /* ओ */ - {0x0914, "au"}, /* औ */ + {0x0905, FLAG_REGULAR, "a"}, /* अ */ + {0x0906, FLAG_REGULAR, "ā"}, /* आ */ + {0x0907, FLAG_REGULAR, "i"}, /* इ */ + {0x0908, FLAG_REGULAR, "ī"}, /* ई */ + {0x0909, FLAG_REGULAR, "u"}, /* उ */ + {0x090a, FLAG_REGULAR, "ū"}, /* ऊ */ + {0x090b, FLAG_REGULAR, "ṛ"}, /* ऋ */ + {0x0960, FLAG_REGULAR, "ṝ"}, /* ॠ */ + {0x090c, FLAG_REGULAR, "ḷ"}, /* ऌ */ + {0x0961, FLAG_REGULAR, "ḹ"}, /* ॡ */ + {0x090f, FLAG_REGULAR, "e"}, /* ए */ + {0x0910, FLAG_REGULAR, "ai"}, /* ऐ */ + {0x0913, FLAG_REGULAR, "o"}, /* ओ */ + {0x0914, FLAG_REGULAR, "au"}, /* औ */ /* Consonants */ - {0x0915, "ka"}, /* क */ - {0x0916, "kha"}, /* ख */ - {0x0917, "ga"}, /* ग */ - {0x0918, "gha"}, /* घ */ - {0x0919, "ṅa"}, /* ङ */ - {0x0939, "ha"}, /* ह */ - {0x091a, "ca"}, /* च */ - {0x091b, "cha"}, /* छ */ - {0x091c, "ja"}, /* ज */ - {0x091d, "jha"}, /* झ */ - {0x091e, "ña"}, /* ञ */ - {0x092f, "ya"}, /* य */ - {0x0936, "śa"}, /* श */ - {0x091F, "ṭa"}, /* ट */ - {0x0920, "ṭha"}, /* ठ */ - {0x0921, "ḍa"}, /* ड */ - {0x0922, "ḍha"}, /* ढ */ - {0x0923, "ṇa"}, /* ण */ - {0x0930, "ra"}, /* र */ - {0x0937, "ṣa"}, /* श */ - {0x0924, "ta"}, /* त */ - {0x0925, "tha"}, /* थ */ - {0x0926, "da"}, /* द */ - {0x0927, "dha"}, /* ध */ - {0x0928, "na"}, /* न */ - {0x0932, "la"}, /* ल */ - {0x0938, "sa"}, /* स */ - {0x092a, "pa"}, /* प */ - {0x092b, "pha"}, /* फ */ - {0x092c, "ba"}, /* ब */ - {0x092d, "bha"}, /* भ */ - {0x092e, "ma"}, /* म */ - {0x0935, "va"}, /* व */ + {0x0915, FLAG_REGULAR, "ka"}, /* क */ + {0x0916, FLAG_REGULAR, "kha"}, /* ख */ + {0x0917, FLAG_REGULAR, "ga"}, /* ग */ + {0x0918, FLAG_REGULAR, "gha"}, /* घ */ + {0x0919, FLAG_REGULAR, "ṅa"}, /* ङ */ + {0x0939, FLAG_REGULAR, "ha"}, /* ह */ + {0x091a, FLAG_REGULAR, "ca"}, /* च */ + {0x091b, FLAG_REGULAR, "cha"}, /* छ */ + {0x091c, FLAG_REGULAR, "ja"}, /* ज */ + {0x091d, FLAG_REGULAR, "jha"}, /* झ */ + {0x091e, FLAG_REGULAR, "ña"}, /* ञ */ + {0x092f, FLAG_REGULAR, "ya"}, /* य */ + {0x0936, FLAG_REGULAR, "śa"}, /* श */ + {0x091F, FLAG_REGULAR, "ṭa"}, /* ट */ + {0x0920, FLAG_REGULAR, "ṭha"}, /* ठ */ + {0x0921, FLAG_REGULAR, "ḍa"}, /* ड */ + {0x0922, FLAG_REGULAR, "ḍha"}, /* ढ */ + {0x0923, FLAG_REGULAR, "ṇa"}, /* ण */ + {0x0930, FLAG_REGULAR, "ra"}, /* र */ + {0x0937, FLAG_REGULAR, "ṣa"}, /* श */ + {0x0924, FLAG_REGULAR, "ta"}, /* त */ + {0x0925, FLAG_REGULAR, "tha"}, /* थ */ + {0x0926, FLAG_REGULAR, "da"}, /* द */ + {0x0927, FLAG_REGULAR, "dha"}, /* ध */ + {0x0928, FLAG_REGULAR, "na"}, /* न */ + {0x0932, FLAG_REGULAR, "la"}, /* ल */ + {0x0938, FLAG_REGULAR, "sa"}, /* स */ + {0x092a, FLAG_REGULAR, "pa"}, /* प */ + {0x092b, FLAG_REGULAR, "pha"}, /* फ */ + {0x092c, FLAG_REGULAR, "ba"}, /* ब */ + {0x092d, FLAG_REGULAR, "bha"}, /* भ */ + {0x092e, FLAG_REGULAR, "ma"}, /* म */ + {0x0935, FLAG_REGULAR, "va"}, /* व */ /* Codas */ - {0x0902, "ṃ"}, /* ं (anusvara) */ - {0x0903, "ḥ"}, /* ः (visarga) */ - {0x093d, "'"}, /* ऽ (avagrada) */ + {0x0902, FLAG_REGULAR, "ṃ"}, /* ं (anusvara) */ + {0x0903, FLAG_REGULAR, "ḥ"}, /* ः (visarga) */ + {0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */ /* Numbers */ - {0x0966, "0"}, - {0x0967, "1"}, - {0x0968, "2"}, - {0x0969, "3"}, - {0x096a, "4"}, - {0x096b, "5"}, - {0x096c, "6"}, - {0x096d, "7"}, - {0x096e, "8"}, - {0x096f, "9"}, + {0x0966, FLAG_REGULAR, "0"}, + {0x0967, FLAG_REGULAR, "1"}, + {0x0968, FLAG_REGULAR, "2"}, + {0x0969, FLAG_REGULAR, "3"}, + {0x096a, FLAG_REGULAR, "4"}, + {0x096b, FLAG_REGULAR, "5"}, + {0x096c, FLAG_REGULAR, "6"}, + {0x096d, FLAG_REGULAR, "7"}, + {0x096e, FLAG_REGULAR, "8"}, + {0x096f, FLAG_REGULAR, "9"}, - {0, NULL} -}; + /* Diacritic modifiers */ + {0x093e, FLAG_MODIFIER, "ā"}, + {0x093f, FLAG_MODIFIER, "i"}, + {0x0940, FLAG_MODIFIER, "ī"}, + {0x0941, FLAG_MODIFIER, "u"}, + {0x0942, FLAG_MODIFIER, "ū"}, + {0x0943, FLAG_MODIFIER, "ṛ"}, + {0x0944, FLAG_MODIFIER, "ṝ"}, + {0x0962, FLAG_MODIFIER, "ḷ"}, + {0x0963, FLAG_MODIFIER, "ḹ"}, + {0x0947, FLAG_MODIFIER, "e"}, + {0x094b, FLAG_MODIFIER, "o"}, + {0x0948, FLAG_MODIFIER, "ai"}, + {0x094d, FLAG_MODIFIER, ""}, -static const struct transliteration_modifier table_modifiers[] = { - {0x093e, transliteration_modifier_aa}, - {0x093f, transliteration_modifier_i}, - {0x0940, transliteration_modifier_ii}, - {0x0941, transliteration_modifier_u}, - {0x0942, transliteration_modifier_uu}, - {0x0943, transliteration_modifier_r}, - {0x0944, transliteration_modifier_rr}, - {0x0962, transliteration_modifier_l}, - {0x0963, transliteration_modifier_ll}, - {0x0947, transliteration_modifier_e}, - {0x094b, transliteration_modifier_o}, - {0x0948, transliteration_modifier_ai}, - {0x094d, transliteration_modifier_virama}, - {0, NULL} + {0, 0, NULL} }; struct transliteration_context *transliteration_context_iast_alloc() @@ -178,8 +100,7 @@ struct transliteration_context *transliteration_context_iast_alloc() struct transliteration_context *context; context = malloc(sizeof(*context)); - context->table_letters = table_letters; - context->table_modifiers = table_modifiers; + context->table = table; return context; } diff --git a/transliteration.c b/transliteration.c index 22acb15..6524d64 100644 --- a/transliteration.c +++ b/transliteration.c @@ -7,7 +7,7 @@ #include "syllable.h" #include "utf8.h" -const struct transliteration_letter *find_letter_by_code(unsigned long c, +static const struct transliteration_letter *find_letter_by_code(unsigned long c, const struct transliteration_letter *table) { const struct transliteration_letter *walk = table; @@ -24,21 +24,16 @@ const struct transliteration_letter *find_letter_by_code(unsigned long c, return NULL; } -const struct transliteration_modifier *find_modifier_by_code(unsigned long c, - const struct transliteration_modifier *table) +static void syllable_modify(struct syllable *syllable, const char *data) { - const struct transliteration_modifier *walk = table; + char buffer[10]; - if (c == 0) - return NULL; + strcpy(buffer, syllable->data); + buffer[strlen(buffer) - 1] = 0; + strcat(buffer, data); - while (walk->code != 0) { - if (c == walk->code) - return walk; - walk++; - } - - return NULL; + free(syllable->data); + syllable->data = strdup(buffer); } char *transliterate_devanagari_to_latin(const char *text, @@ -51,7 +46,6 @@ char *transliterate_devanagari_to_latin(const char *text, unsigned long c; struct syllable *head, *tail; const struct transliteration_letter *letter; - const struct transliteration_modifier *modifier; head = syllable_alloc(""); tail = head; @@ -60,15 +54,14 @@ char *transliterate_devanagari_to_latin(const char *text, c = utf8_unpack_char(ptr); ptr += utf8_char_length(c); - letter = find_letter_by_code(c, context->table_letters); + letter = find_letter_by_code(c, context->table); if (letter != NULL) { - tail = syllable_append(tail, letter->data); - continue; - } - modifier = find_modifier_by_code(c, context->table_modifiers); - if (modifier != NULL) { - modifier->modifier(tail); + if (letter->flags & FLAG_REGULAR) + tail = syllable_append(tail, letter->data); + else if (letter->flags & FLAG_MODIFIER) + syllable_modify(tail, letter->data); + continue; } diff --git a/transliteration.h b/transliteration.h index 71471b1..408c49c 100644 --- a/transliteration.h +++ b/transliteration.h @@ -5,21 +5,18 @@ #include "syllable.h" + +#define FLAG_REGULAR 1 << 0 +#define FLAG_MODIFIER 1 << 1 + struct transliteration_letter { unsigned long code; + unsigned int flags; const char *data; }; -typedef void (*syllable_modification_t)(struct syllable *syllable); - -struct transliteration_modifier { - unsigned long code; - syllable_modification_t modifier; -}; - struct transliteration_context { - const struct transliteration_letter *table_letters; - const struct transliteration_modifier *table_modifiers; + const struct transliteration_letter *table; }; char *transliterate_devanagari_to_latin(const char *text,