transliteration: refactor modifiers

This commit is contained in:
Vlasta Vesely 2018-04-27 18:58:07 +02:00
parent 69057f0e32
commit bd0b64f91d
3 changed files with 99 additions and 188 deletions

237
iast.c
View file

@ -2,175 +2,97 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "iast.h" #include "iast.h"
#include "transliteration.h"
static inline void transliteration_modifier_apply(struct syllable *syllable,
const char *modified)
{
char buffer[10];
strcpy(buffer, syllable->data);
buffer[strlen(buffer) - 1] = 0;
strcat(buffer, modified);
free(syllable->data);
syllable->data = strdup(buffer);
}
static void transliteration_modifier_aa(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ā");
}
static void transliteration_modifier_i(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "i");
}
static void transliteration_modifier_ii(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ī");
}
static void transliteration_modifier_u(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "u");
}
static void transliteration_modifier_uu(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ū");
}
static void transliteration_modifier_r(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_rr(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_l(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_ll(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_e(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "e");
}
static void transliteration_modifier_o(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "o");
}
static void transliteration_modifier_ai(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ai");
}
static void transliteration_modifier_virama(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static const struct transliteration_letter table_letters[] = { static const struct transliteration_letter table[] = {
/* Vowels */ /* Vowels */
{0x0905, "a"}, /* अ */ {0x0905, FLAG_REGULAR, "a"}, /* अ */
{0x0906, "ā"}, /* आ */ {0x0906, FLAG_REGULAR, "ā"}, /* आ */
{0x0907, "i"}, /* इ */ {0x0907, FLAG_REGULAR, "i"}, /* इ */
{0x0908, "ī"}, /* ई */ {0x0908, FLAG_REGULAR, "ī"}, /* ई */
{0x0909, "u"}, /* उ */ {0x0909, FLAG_REGULAR, "u"}, /* उ */
{0x090a, "ū"}, /* ऊ */ {0x090a, FLAG_REGULAR, "ū"}, /* ऊ */
{0x090b, ""}, /* ऋ */ {0x090b, FLAG_REGULAR, ""}, /* ऋ */
{0x0960, ""}, /* ॠ */ {0x0960, FLAG_REGULAR, ""}, /* ॠ */
{0x090c, ""}, /* ऌ */ {0x090c, FLAG_REGULAR, ""}, /* ऌ */
{0x0961, ""}, /* ॡ */ {0x0961, FLAG_REGULAR, ""}, /* ॡ */
{0x090f, "e"}, /* ए */ {0x090f, FLAG_REGULAR, "e"}, /* ए */
{0x0910, "ai"}, /* ऐ */ {0x0910, FLAG_REGULAR, "ai"}, /* ऐ */
{0x0913, "o"}, /* ओ */ {0x0913, FLAG_REGULAR, "o"}, /* ओ */
{0x0914, "au"}, /* औ */ {0x0914, FLAG_REGULAR, "au"}, /* औ */
/* Consonants */ /* Consonants */
{0x0915, "ka"}, /* क */ {0x0915, FLAG_REGULAR, "ka"}, /* क */
{0x0916, "kha"}, /* ख */ {0x0916, FLAG_REGULAR, "kha"}, /* ख */
{0x0917, "ga"}, /* ग */ {0x0917, FLAG_REGULAR, "ga"}, /* ग */
{0x0918, "gha"}, /* घ */ {0x0918, FLAG_REGULAR, "gha"}, /* घ */
{0x0919, "ṅa"}, /* ङ */ {0x0919, FLAG_REGULAR, "ṅa"}, /* ङ */
{0x0939, "ha"}, /* ह */ {0x0939, FLAG_REGULAR, "ha"}, /* ह */
{0x091a, "ca"}, /* च */ {0x091a, FLAG_REGULAR, "ca"}, /* च */
{0x091b, "cha"}, /* छ */ {0x091b, FLAG_REGULAR, "cha"}, /* छ */
{0x091c, "ja"}, /* ज */ {0x091c, FLAG_REGULAR, "ja"}, /* ज */
{0x091d, "jha"}, /* झ */ {0x091d, FLAG_REGULAR, "jha"}, /* झ */
{0x091e, "ña"}, /* ञ */ {0x091e, FLAG_REGULAR, "ña"}, /* ञ */
{0x092f, "ya"}, /* य */ {0x092f, FLAG_REGULAR, "ya"}, /* य */
{0x0936, "śa"}, /* श */ {0x0936, FLAG_REGULAR, "śa"}, /* श */
{0x091F, "ṭa"}, /* ट */ {0x091F, FLAG_REGULAR, "ṭa"}, /* ट */
{0x0920, "ṭha"}, /* ठ */ {0x0920, FLAG_REGULAR, "ṭha"}, /* ठ */
{0x0921, "ḍa"}, /* ड */ {0x0921, FLAG_REGULAR, "ḍa"}, /* ड */
{0x0922, "ḍha"}, /* ढ */ {0x0922, FLAG_REGULAR, "ḍha"}, /* ढ */
{0x0923, "ṇa"}, /* ण */ {0x0923, FLAG_REGULAR, "ṇa"}, /* ण */
{0x0930, "ra"}, /* र */ {0x0930, FLAG_REGULAR, "ra"}, /* र */
{0x0937, "ṣa"}, /* श */ {0x0937, FLAG_REGULAR, "ṣa"}, /* श */
{0x0924, "ta"}, /* त */ {0x0924, FLAG_REGULAR, "ta"}, /* त */
{0x0925, "tha"}, /* थ */ {0x0925, FLAG_REGULAR, "tha"}, /* थ */
{0x0926, "da"}, /* द */ {0x0926, FLAG_REGULAR, "da"}, /* द */
{0x0927, "dha"}, /* ध */ {0x0927, FLAG_REGULAR, "dha"}, /* ध */
{0x0928, "na"}, /* न */ {0x0928, FLAG_REGULAR, "na"}, /* न */
{0x0932, "la"}, /* ल */ {0x0932, FLAG_REGULAR, "la"}, /* ल */
{0x0938, "sa"}, /* स */ {0x0938, FLAG_REGULAR, "sa"}, /* स */
{0x092a, "pa"}, /* प */ {0x092a, FLAG_REGULAR, "pa"}, /* प */
{0x092b, "pha"}, /* फ */ {0x092b, FLAG_REGULAR, "pha"}, /* फ */
{0x092c, "ba"}, /* ब */ {0x092c, FLAG_REGULAR, "ba"}, /* ब */
{0x092d, "bha"}, /* भ */ {0x092d, FLAG_REGULAR, "bha"}, /* भ */
{0x092e, "ma"}, /* म */ {0x092e, FLAG_REGULAR, "ma"}, /* म */
{0x0935, "va"}, /* व */ {0x0935, FLAG_REGULAR, "va"}, /* व */
/* Codas */ /* Codas */
{0x0902, ""}, /* ं (anusvara) */ {0x0902, FLAG_REGULAR, ""}, /* ं (anusvara) */
{0x0903, ""}, /* (visarga) */ {0x0903, FLAG_REGULAR, ""}, /* (visarga) */
{0x093d, "'"}, /* ऽ (avagrada) */ {0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */
/* Numbers */ /* Numbers */
{0x0966, "0"}, {0x0966, FLAG_REGULAR, "0"},
{0x0967, "1"}, {0x0967, FLAG_REGULAR, "1"},
{0x0968, "2"}, {0x0968, FLAG_REGULAR, "2"},
{0x0969, "3"}, {0x0969, FLAG_REGULAR, "3"},
{0x096a, "4"}, {0x096a, FLAG_REGULAR, "4"},
{0x096b, "5"}, {0x096b, FLAG_REGULAR, "5"},
{0x096c, "6"}, {0x096c, FLAG_REGULAR, "6"},
{0x096d, "7"}, {0x096d, FLAG_REGULAR, "7"},
{0x096e, "8"}, {0x096e, FLAG_REGULAR, "8"},
{0x096f, "9"}, {0x096f, FLAG_REGULAR, "9"},
{0, NULL} /* Diacritic modifiers */
}; {0x093e, FLAG_MODIFIER, "ā"},
{0x093f, FLAG_MODIFIER, "i"},
{0x0940, FLAG_MODIFIER, "ī"},
{0x0941, FLAG_MODIFIER, "u"},
{0x0942, FLAG_MODIFIER, "ū"},
{0x0943, FLAG_MODIFIER, ""},
{0x0944, FLAG_MODIFIER, ""},
{0x0962, FLAG_MODIFIER, ""},
{0x0963, FLAG_MODIFIER, ""},
{0x0947, FLAG_MODIFIER, "e"},
{0x094b, FLAG_MODIFIER, "o"},
{0x0948, FLAG_MODIFIER, "ai"},
{0x094d, FLAG_MODIFIER, ""},
static const struct transliteration_modifier table_modifiers[] = { {0, 0, NULL}
{0x093e, transliteration_modifier_aa},
{0x093f, transliteration_modifier_i},
{0x0940, transliteration_modifier_ii},
{0x0941, transliteration_modifier_u},
{0x0942, transliteration_modifier_uu},
{0x0943, transliteration_modifier_r},
{0x0944, transliteration_modifier_rr},
{0x0962, transliteration_modifier_l},
{0x0963, transliteration_modifier_ll},
{0x0947, transliteration_modifier_e},
{0x094b, transliteration_modifier_o},
{0x0948, transliteration_modifier_ai},
{0x094d, transliteration_modifier_virama},
{0, NULL}
}; };
struct transliteration_context *transliteration_context_iast_alloc() struct transliteration_context *transliteration_context_iast_alloc()
@ -178,8 +100,7 @@ struct transliteration_context *transliteration_context_iast_alloc()
struct transliteration_context *context; struct transliteration_context *context;
context = malloc(sizeof(*context)); context = malloc(sizeof(*context));
context->table_letters = table_letters; context->table = table;
context->table_modifiers = table_modifiers;
return context; return context;
} }

View file

@ -7,7 +7,7 @@
#include "syllable.h" #include "syllable.h"
#include "utf8.h" #include "utf8.h"
const struct transliteration_letter *find_letter_by_code(unsigned long c, static const struct transliteration_letter *find_letter_by_code(unsigned long c,
const struct transliteration_letter *table) const struct transliteration_letter *table)
{ {
const struct transliteration_letter *walk = table; const struct transliteration_letter *walk = table;
@ -24,21 +24,16 @@ const struct transliteration_letter *find_letter_by_code(unsigned long c,
return NULL; return NULL;
} }
const struct transliteration_modifier *find_modifier_by_code(unsigned long c, static void syllable_modify(struct syllable *syllable, const char *data)
const struct transliteration_modifier *table)
{ {
const struct transliteration_modifier *walk = table; char buffer[10];
if (c == 0) strcpy(buffer, syllable->data);
return NULL; buffer[strlen(buffer) - 1] = 0;
strcat(buffer, data);
while (walk->code != 0) { free(syllable->data);
if (c == walk->code) syllable->data = strdup(buffer);
return walk;
walk++;
}
return NULL;
} }
char *transliterate_devanagari_to_latin(const char *text, char *transliterate_devanagari_to_latin(const char *text,
@ -51,7 +46,6 @@ char *transliterate_devanagari_to_latin(const char *text,
unsigned long c; unsigned long c;
struct syllable *head, *tail; struct syllable *head, *tail;
const struct transliteration_letter *letter; const struct transliteration_letter *letter;
const struct transliteration_modifier *modifier;
head = syllable_alloc(""); head = syllable_alloc("");
tail = head; tail = head;
@ -60,15 +54,14 @@ char *transliterate_devanagari_to_latin(const char *text,
c = utf8_unpack_char(ptr); c = utf8_unpack_char(ptr);
ptr += utf8_char_length(c); ptr += utf8_char_length(c);
letter = find_letter_by_code(c, context->table_letters); letter = find_letter_by_code(c, context->table);
if (letter != NULL) { if (letter != NULL) {
tail = syllable_append(tail, letter->data);
continue;
}
modifier = find_modifier_by_code(c, context->table_modifiers); if (letter->flags & FLAG_REGULAR)
if (modifier != NULL) { tail = syllable_append(tail, letter->data);
modifier->modifier(tail); else if (letter->flags & FLAG_MODIFIER)
syllable_modify(tail, letter->data);
continue; continue;
} }

View file

@ -5,21 +5,18 @@
#include "syllable.h" #include "syllable.h"
#define FLAG_REGULAR 1 << 0
#define FLAG_MODIFIER 1 << 1
struct transliteration_letter { struct transliteration_letter {
unsigned long code; unsigned long code;
unsigned int flags;
const char *data; const char *data;
}; };
typedef void (*syllable_modification_t)(struct syllable *syllable);
struct transliteration_modifier {
unsigned long code;
syllable_modification_t modifier;
};
struct transliteration_context { struct transliteration_context {
const struct transliteration_letter *table_letters; const struct transliteration_letter *table;
const struct transliteration_modifier *table_modifiers;
}; };
char *transliterate_devanagari_to_latin(const char *text, char *transliterate_devanagari_to_latin(const char *text,