transliteration: refactor modifiers

This commit is contained in:
Vlasta Vesely 2018-04-27 18:58:07 +02:00
parent 69057f0e32
commit bd0b64f91d
3 changed files with 99 additions and 188 deletions

237
iast.c
View file

@ -2,175 +2,97 @@
#include <stdlib.h>
#include <string.h>
#include "iast.h"
static inline void transliteration_modifier_apply(struct syllable *syllable,
const char *modified)
{
char buffer[10];
strcpy(buffer, syllable->data);
buffer[strlen(buffer) - 1] = 0;
strcat(buffer, modified);
free(syllable->data);
syllable->data = strdup(buffer);
}
static void transliteration_modifier_aa(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ā");
}
static void transliteration_modifier_i(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "i");
}
static void transliteration_modifier_ii(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ī");
}
static void transliteration_modifier_u(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "u");
}
static void transliteration_modifier_uu(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ū");
}
static void transliteration_modifier_r(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_rr(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_l(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_ll(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
static void transliteration_modifier_e(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "e");
}
static void transliteration_modifier_o(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "o");
}
static void transliteration_modifier_ai(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "ai");
}
static void transliteration_modifier_virama(struct syllable *syllable)
{
transliteration_modifier_apply(syllable, "");
}
#include "transliteration.h"
static const struct transliteration_letter table_letters[] = {
static const struct transliteration_letter table[] = {
/* Vowels */
{0x0905, "a"}, /* अ */
{0x0906, "ā"}, /* आ */
{0x0907, "i"}, /* इ */
{0x0908, "ī"}, /* ई */
{0x0909, "u"}, /* उ */
{0x090a, "ū"}, /* ऊ */
{0x090b, ""}, /* ऋ */
{0x0960, ""}, /* ॠ */
{0x090c, ""}, /* ऌ */
{0x0961, ""}, /* ॡ */
{0x090f, "e"}, /* ए */
{0x0910, "ai"}, /* ऐ */
{0x0913, "o"}, /* ओ */
{0x0914, "au"}, /* औ */
{0x0905, FLAG_REGULAR, "a"}, /* अ */
{0x0906, FLAG_REGULAR, "ā"}, /* आ */
{0x0907, FLAG_REGULAR, "i"}, /* इ */
{0x0908, FLAG_REGULAR, "ī"}, /* ई */
{0x0909, FLAG_REGULAR, "u"}, /* उ */
{0x090a, FLAG_REGULAR, "ū"}, /* ऊ */
{0x090b, FLAG_REGULAR, ""}, /* ऋ */
{0x0960, FLAG_REGULAR, ""}, /* ॠ */
{0x090c, FLAG_REGULAR, ""}, /* ऌ */
{0x0961, FLAG_REGULAR, ""}, /* ॡ */
{0x090f, FLAG_REGULAR, "e"}, /* ए */
{0x0910, FLAG_REGULAR, "ai"}, /* ऐ */
{0x0913, FLAG_REGULAR, "o"}, /* ओ */
{0x0914, FLAG_REGULAR, "au"}, /* औ */
/* Consonants */
{0x0915, "ka"}, /* क */
{0x0916, "kha"}, /* ख */
{0x0917, "ga"}, /* ग */
{0x0918, "gha"}, /* घ */
{0x0919, "ṅa"}, /* ङ */
{0x0939, "ha"}, /* ह */
{0x091a, "ca"}, /* च */
{0x091b, "cha"}, /* छ */
{0x091c, "ja"}, /* ज */
{0x091d, "jha"}, /* झ */
{0x091e, "ña"}, /* ञ */
{0x092f, "ya"}, /* य */
{0x0936, "śa"}, /* श */
{0x091F, "ṭa"}, /* ट */
{0x0920, "ṭha"}, /* ठ */
{0x0921, "ḍa"}, /* ड */
{0x0922, "ḍha"}, /* ढ */
{0x0923, "ṇa"}, /* ण */
{0x0930, "ra"}, /* र */
{0x0937, "ṣa"}, /* श */
{0x0924, "ta"}, /* त */
{0x0925, "tha"}, /* थ */
{0x0926, "da"}, /* द */
{0x0927, "dha"}, /* ध */
{0x0928, "na"}, /* न */
{0x0932, "la"}, /* ल */
{0x0938, "sa"}, /* स */
{0x092a, "pa"}, /* प */
{0x092b, "pha"}, /* फ */
{0x092c, "ba"}, /* ब */
{0x092d, "bha"}, /* भ */
{0x092e, "ma"}, /* म */
{0x0935, "va"}, /* व */
{0x0915, FLAG_REGULAR, "ka"}, /* क */
{0x0916, FLAG_REGULAR, "kha"}, /* ख */
{0x0917, FLAG_REGULAR, "ga"}, /* ग */
{0x0918, FLAG_REGULAR, "gha"}, /* घ */
{0x0919, FLAG_REGULAR, "ṅa"}, /* ङ */
{0x0939, FLAG_REGULAR, "ha"}, /* ह */
{0x091a, FLAG_REGULAR, "ca"}, /* च */
{0x091b, FLAG_REGULAR, "cha"}, /* छ */
{0x091c, FLAG_REGULAR, "ja"}, /* ज */
{0x091d, FLAG_REGULAR, "jha"}, /* झ */
{0x091e, FLAG_REGULAR, "ña"}, /* ञ */
{0x092f, FLAG_REGULAR, "ya"}, /* य */
{0x0936, FLAG_REGULAR, "śa"}, /* श */
{0x091F, FLAG_REGULAR, "ṭa"}, /* ट */
{0x0920, FLAG_REGULAR, "ṭha"}, /* ठ */
{0x0921, FLAG_REGULAR, "ḍa"}, /* ड */
{0x0922, FLAG_REGULAR, "ḍha"}, /* ढ */
{0x0923, FLAG_REGULAR, "ṇa"}, /* ण */
{0x0930, FLAG_REGULAR, "ra"}, /* र */
{0x0937, FLAG_REGULAR, "ṣa"}, /* श */
{0x0924, FLAG_REGULAR, "ta"}, /* त */
{0x0925, FLAG_REGULAR, "tha"}, /* थ */
{0x0926, FLAG_REGULAR, "da"}, /* द */
{0x0927, FLAG_REGULAR, "dha"}, /* ध */
{0x0928, FLAG_REGULAR, "na"}, /* न */
{0x0932, FLAG_REGULAR, "la"}, /* ल */
{0x0938, FLAG_REGULAR, "sa"}, /* स */
{0x092a, FLAG_REGULAR, "pa"}, /* प */
{0x092b, FLAG_REGULAR, "pha"}, /* फ */
{0x092c, FLAG_REGULAR, "ba"}, /* ब */
{0x092d, FLAG_REGULAR, "bha"}, /* भ */
{0x092e, FLAG_REGULAR, "ma"}, /* म */
{0x0935, FLAG_REGULAR, "va"}, /* व */
/* Codas */
{0x0902, ""}, /* ं (anusvara) */
{0x0903, ""}, /* (visarga) */
{0x093d, "'"}, /* ऽ (avagrada) */
{0x0902, FLAG_REGULAR, ""}, /* ं (anusvara) */
{0x0903, FLAG_REGULAR, ""}, /* (visarga) */
{0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */
/* Numbers */
{0x0966, "0"},
{0x0967, "1"},
{0x0968, "2"},
{0x0969, "3"},
{0x096a, "4"},
{0x096b, "5"},
{0x096c, "6"},
{0x096d, "7"},
{0x096e, "8"},
{0x096f, "9"},
{0x0966, FLAG_REGULAR, "0"},
{0x0967, FLAG_REGULAR, "1"},
{0x0968, FLAG_REGULAR, "2"},
{0x0969, FLAG_REGULAR, "3"},
{0x096a, FLAG_REGULAR, "4"},
{0x096b, FLAG_REGULAR, "5"},
{0x096c, FLAG_REGULAR, "6"},
{0x096d, FLAG_REGULAR, "7"},
{0x096e, FLAG_REGULAR, "8"},
{0x096f, FLAG_REGULAR, "9"},
{0, NULL}
};
/* Diacritic modifiers */
{0x093e, FLAG_MODIFIER, "ā"},
{0x093f, FLAG_MODIFIER, "i"},
{0x0940, FLAG_MODIFIER, "ī"},
{0x0941, FLAG_MODIFIER, "u"},
{0x0942, FLAG_MODIFIER, "ū"},
{0x0943, FLAG_MODIFIER, ""},
{0x0944, FLAG_MODIFIER, ""},
{0x0962, FLAG_MODIFIER, ""},
{0x0963, FLAG_MODIFIER, ""},
{0x0947, FLAG_MODIFIER, "e"},
{0x094b, FLAG_MODIFIER, "o"},
{0x0948, FLAG_MODIFIER, "ai"},
{0x094d, FLAG_MODIFIER, ""},
static const struct transliteration_modifier table_modifiers[] = {
{0x093e, transliteration_modifier_aa},
{0x093f, transliteration_modifier_i},
{0x0940, transliteration_modifier_ii},
{0x0941, transliteration_modifier_u},
{0x0942, transliteration_modifier_uu},
{0x0943, transliteration_modifier_r},
{0x0944, transliteration_modifier_rr},
{0x0962, transliteration_modifier_l},
{0x0963, transliteration_modifier_ll},
{0x0947, transliteration_modifier_e},
{0x094b, transliteration_modifier_o},
{0x0948, transliteration_modifier_ai},
{0x094d, transliteration_modifier_virama},
{0, NULL}
{0, 0, NULL}
};
struct transliteration_context *transliteration_context_iast_alloc()
@ -178,8 +100,7 @@ struct transliteration_context *transliteration_context_iast_alloc()
struct transliteration_context *context;
context = malloc(sizeof(*context));
context->table_letters = table_letters;
context->table_modifiers = table_modifiers;
context->table = table;
return context;
}

View file

@ -7,7 +7,7 @@
#include "syllable.h"
#include "utf8.h"
const struct transliteration_letter *find_letter_by_code(unsigned long c,
static const struct transliteration_letter *find_letter_by_code(unsigned long c,
const struct transliteration_letter *table)
{
const struct transliteration_letter *walk = table;
@ -24,21 +24,16 @@ const struct transliteration_letter *find_letter_by_code(unsigned long c,
return NULL;
}
const struct transliteration_modifier *find_modifier_by_code(unsigned long c,
const struct transliteration_modifier *table)
static void syllable_modify(struct syllable *syllable, const char *data)
{
const struct transliteration_modifier *walk = table;
char buffer[10];
if (c == 0)
return NULL;
strcpy(buffer, syllable->data);
buffer[strlen(buffer) - 1] = 0;
strcat(buffer, data);
while (walk->code != 0) {
if (c == walk->code)
return walk;
walk++;
}
return NULL;
free(syllable->data);
syllable->data = strdup(buffer);
}
char *transliterate_devanagari_to_latin(const char *text,
@ -51,7 +46,6 @@ char *transliterate_devanagari_to_latin(const char *text,
unsigned long c;
struct syllable *head, *tail;
const struct transliteration_letter *letter;
const struct transliteration_modifier *modifier;
head = syllable_alloc("");
tail = head;
@ -60,15 +54,14 @@ char *transliterate_devanagari_to_latin(const char *text,
c = utf8_unpack_char(ptr);
ptr += utf8_char_length(c);
letter = find_letter_by_code(c, context->table_letters);
letter = find_letter_by_code(c, context->table);
if (letter != NULL) {
tail = syllable_append(tail, letter->data);
continue;
}
modifier = find_modifier_by_code(c, context->table_modifiers);
if (modifier != NULL) {
modifier->modifier(tail);
if (letter->flags & FLAG_REGULAR)
tail = syllable_append(tail, letter->data);
else if (letter->flags & FLAG_MODIFIER)
syllable_modify(tail, letter->data);
continue;
}

View file

@ -5,21 +5,18 @@
#include "syllable.h"
#define FLAG_REGULAR 1 << 0
#define FLAG_MODIFIER 1 << 1
struct transliteration_letter {
unsigned long code;
unsigned int flags;
const char *data;
};
typedef void (*syllable_modification_t)(struct syllable *syllable);
struct transliteration_modifier {
unsigned long code;
syllable_modification_t modifier;
};
struct transliteration_context {
const struct transliteration_letter *table_letters;
const struct transliteration_modifier *table_modifiers;
const struct transliteration_letter *table;
};
char *transliterate_devanagari_to_latin(const char *text,