transliteration: refactor modifiers
This commit is contained in:
parent
69057f0e32
commit
bd0b64f91d
3 changed files with 99 additions and 188 deletions
237
iast.c
237
iast.c
|
@ -2,175 +2,97 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "iast.h"
|
||||
|
||||
static inline void transliteration_modifier_apply(struct syllable *syllable,
|
||||
const char *modified)
|
||||
{
|
||||
char buffer[10];
|
||||
|
||||
strcpy(buffer, syllable->data);
|
||||
buffer[strlen(buffer) - 1] = 0;
|
||||
strcat(buffer, modified);
|
||||
|
||||
free(syllable->data);
|
||||
syllable->data = strdup(buffer);
|
||||
}
|
||||
|
||||
static void transliteration_modifier_aa(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ā");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_i(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "i");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_ii(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ī");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_u(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "u");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_uu(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ū");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_r(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ṛ");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_rr(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ṝ");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_l(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ḷ");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_ll(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ḹ");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_e(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "e");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_o(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "o");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_ai(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "ai");
|
||||
}
|
||||
|
||||
static void transliteration_modifier_virama(struct syllable *syllable)
|
||||
{
|
||||
transliteration_modifier_apply(syllable, "");
|
||||
}
|
||||
#include "transliteration.h"
|
||||
|
||||
|
||||
static const struct transliteration_letter table_letters[] = {
|
||||
static const struct transliteration_letter table[] = {
|
||||
|
||||
/* Vowels */
|
||||
{0x0905, "a"}, /* अ */
|
||||
{0x0906, "ā"}, /* आ */
|
||||
{0x0907, "i"}, /* इ */
|
||||
{0x0908, "ī"}, /* ई */
|
||||
{0x0909, "u"}, /* उ */
|
||||
{0x090a, "ū"}, /* ऊ */
|
||||
{0x090b, "ṛ"}, /* ऋ */
|
||||
{0x0960, "ṝ"}, /* ॠ */
|
||||
{0x090c, "ḷ"}, /* ऌ */
|
||||
{0x0961, "ḹ"}, /* ॡ */
|
||||
{0x090f, "e"}, /* ए */
|
||||
{0x0910, "ai"}, /* ऐ */
|
||||
{0x0913, "o"}, /* ओ */
|
||||
{0x0914, "au"}, /* औ */
|
||||
{0x0905, FLAG_REGULAR, "a"}, /* अ */
|
||||
{0x0906, FLAG_REGULAR, "ā"}, /* आ */
|
||||
{0x0907, FLAG_REGULAR, "i"}, /* इ */
|
||||
{0x0908, FLAG_REGULAR, "ī"}, /* ई */
|
||||
{0x0909, FLAG_REGULAR, "u"}, /* उ */
|
||||
{0x090a, FLAG_REGULAR, "ū"}, /* ऊ */
|
||||
{0x090b, FLAG_REGULAR, "ṛ"}, /* ऋ */
|
||||
{0x0960, FLAG_REGULAR, "ṝ"}, /* ॠ */
|
||||
{0x090c, FLAG_REGULAR, "ḷ"}, /* ऌ */
|
||||
{0x0961, FLAG_REGULAR, "ḹ"}, /* ॡ */
|
||||
{0x090f, FLAG_REGULAR, "e"}, /* ए */
|
||||
{0x0910, FLAG_REGULAR, "ai"}, /* ऐ */
|
||||
{0x0913, FLAG_REGULAR, "o"}, /* ओ */
|
||||
{0x0914, FLAG_REGULAR, "au"}, /* औ */
|
||||
|
||||
/* Consonants */
|
||||
{0x0915, "ka"}, /* क */
|
||||
{0x0916, "kha"}, /* ख */
|
||||
{0x0917, "ga"}, /* ग */
|
||||
{0x0918, "gha"}, /* घ */
|
||||
{0x0919, "ṅa"}, /* ङ */
|
||||
{0x0939, "ha"}, /* ह */
|
||||
{0x091a, "ca"}, /* च */
|
||||
{0x091b, "cha"}, /* छ */
|
||||
{0x091c, "ja"}, /* ज */
|
||||
{0x091d, "jha"}, /* झ */
|
||||
{0x091e, "ña"}, /* ञ */
|
||||
{0x092f, "ya"}, /* य */
|
||||
{0x0936, "śa"}, /* श */
|
||||
{0x091F, "ṭa"}, /* ट */
|
||||
{0x0920, "ṭha"}, /* ठ */
|
||||
{0x0921, "ḍa"}, /* ड */
|
||||
{0x0922, "ḍha"}, /* ढ */
|
||||
{0x0923, "ṇa"}, /* ण */
|
||||
{0x0930, "ra"}, /* र */
|
||||
{0x0937, "ṣa"}, /* श */
|
||||
{0x0924, "ta"}, /* त */
|
||||
{0x0925, "tha"}, /* थ */
|
||||
{0x0926, "da"}, /* द */
|
||||
{0x0927, "dha"}, /* ध */
|
||||
{0x0928, "na"}, /* न */
|
||||
{0x0932, "la"}, /* ल */
|
||||
{0x0938, "sa"}, /* स */
|
||||
{0x092a, "pa"}, /* प */
|
||||
{0x092b, "pha"}, /* फ */
|
||||
{0x092c, "ba"}, /* ब */
|
||||
{0x092d, "bha"}, /* भ */
|
||||
{0x092e, "ma"}, /* म */
|
||||
{0x0935, "va"}, /* व */
|
||||
{0x0915, FLAG_REGULAR, "ka"}, /* क */
|
||||
{0x0916, FLAG_REGULAR, "kha"}, /* ख */
|
||||
{0x0917, FLAG_REGULAR, "ga"}, /* ग */
|
||||
{0x0918, FLAG_REGULAR, "gha"}, /* घ */
|
||||
{0x0919, FLAG_REGULAR, "ṅa"}, /* ङ */
|
||||
{0x0939, FLAG_REGULAR, "ha"}, /* ह */
|
||||
{0x091a, FLAG_REGULAR, "ca"}, /* च */
|
||||
{0x091b, FLAG_REGULAR, "cha"}, /* छ */
|
||||
{0x091c, FLAG_REGULAR, "ja"}, /* ज */
|
||||
{0x091d, FLAG_REGULAR, "jha"}, /* झ */
|
||||
{0x091e, FLAG_REGULAR, "ña"}, /* ञ */
|
||||
{0x092f, FLAG_REGULAR, "ya"}, /* य */
|
||||
{0x0936, FLAG_REGULAR, "śa"}, /* श */
|
||||
{0x091F, FLAG_REGULAR, "ṭa"}, /* ट */
|
||||
{0x0920, FLAG_REGULAR, "ṭha"}, /* ठ */
|
||||
{0x0921, FLAG_REGULAR, "ḍa"}, /* ड */
|
||||
{0x0922, FLAG_REGULAR, "ḍha"}, /* ढ */
|
||||
{0x0923, FLAG_REGULAR, "ṇa"}, /* ण */
|
||||
{0x0930, FLAG_REGULAR, "ra"}, /* र */
|
||||
{0x0937, FLAG_REGULAR, "ṣa"}, /* श */
|
||||
{0x0924, FLAG_REGULAR, "ta"}, /* त */
|
||||
{0x0925, FLAG_REGULAR, "tha"}, /* थ */
|
||||
{0x0926, FLAG_REGULAR, "da"}, /* द */
|
||||
{0x0927, FLAG_REGULAR, "dha"}, /* ध */
|
||||
{0x0928, FLAG_REGULAR, "na"}, /* न */
|
||||
{0x0932, FLAG_REGULAR, "la"}, /* ल */
|
||||
{0x0938, FLAG_REGULAR, "sa"}, /* स */
|
||||
{0x092a, FLAG_REGULAR, "pa"}, /* प */
|
||||
{0x092b, FLAG_REGULAR, "pha"}, /* फ */
|
||||
{0x092c, FLAG_REGULAR, "ba"}, /* ब */
|
||||
{0x092d, FLAG_REGULAR, "bha"}, /* भ */
|
||||
{0x092e, FLAG_REGULAR, "ma"}, /* म */
|
||||
{0x0935, FLAG_REGULAR, "va"}, /* व */
|
||||
|
||||
/* Codas */
|
||||
{0x0902, "ṃ"}, /* ं (anusvara) */
|
||||
{0x0903, "ḥ"}, /* ः (visarga) */
|
||||
{0x093d, "'"}, /* ऽ (avagrada) */
|
||||
{0x0902, FLAG_REGULAR, "ṃ"}, /* ं (anusvara) */
|
||||
{0x0903, FLAG_REGULAR, "ḥ"}, /* ः (visarga) */
|
||||
{0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */
|
||||
|
||||
/* Numbers */
|
||||
{0x0966, "0"},
|
||||
{0x0967, "1"},
|
||||
{0x0968, "2"},
|
||||
{0x0969, "3"},
|
||||
{0x096a, "4"},
|
||||
{0x096b, "5"},
|
||||
{0x096c, "6"},
|
||||
{0x096d, "7"},
|
||||
{0x096e, "8"},
|
||||
{0x096f, "9"},
|
||||
{0x0966, FLAG_REGULAR, "0"},
|
||||
{0x0967, FLAG_REGULAR, "1"},
|
||||
{0x0968, FLAG_REGULAR, "2"},
|
||||
{0x0969, FLAG_REGULAR, "3"},
|
||||
{0x096a, FLAG_REGULAR, "4"},
|
||||
{0x096b, FLAG_REGULAR, "5"},
|
||||
{0x096c, FLAG_REGULAR, "6"},
|
||||
{0x096d, FLAG_REGULAR, "7"},
|
||||
{0x096e, FLAG_REGULAR, "8"},
|
||||
{0x096f, FLAG_REGULAR, "9"},
|
||||
|
||||
{0, NULL}
|
||||
};
|
||||
/* Diacritic modifiers */
|
||||
{0x093e, FLAG_MODIFIER, "ā"},
|
||||
{0x093f, FLAG_MODIFIER, "i"},
|
||||
{0x0940, FLAG_MODIFIER, "ī"},
|
||||
{0x0941, FLAG_MODIFIER, "u"},
|
||||
{0x0942, FLAG_MODIFIER, "ū"},
|
||||
{0x0943, FLAG_MODIFIER, "ṛ"},
|
||||
{0x0944, FLAG_MODIFIER, "ṝ"},
|
||||
{0x0962, FLAG_MODIFIER, "ḷ"},
|
||||
{0x0963, FLAG_MODIFIER, "ḹ"},
|
||||
{0x0947, FLAG_MODIFIER, "e"},
|
||||
{0x094b, FLAG_MODIFIER, "o"},
|
||||
{0x0948, FLAG_MODIFIER, "ai"},
|
||||
{0x094d, FLAG_MODIFIER, ""},
|
||||
|
||||
static const struct transliteration_modifier table_modifiers[] = {
|
||||
{0x093e, transliteration_modifier_aa},
|
||||
{0x093f, transliteration_modifier_i},
|
||||
{0x0940, transliteration_modifier_ii},
|
||||
{0x0941, transliteration_modifier_u},
|
||||
{0x0942, transliteration_modifier_uu},
|
||||
{0x0943, transliteration_modifier_r},
|
||||
{0x0944, transliteration_modifier_rr},
|
||||
{0x0962, transliteration_modifier_l},
|
||||
{0x0963, transliteration_modifier_ll},
|
||||
{0x0947, transliteration_modifier_e},
|
||||
{0x094b, transliteration_modifier_o},
|
||||
{0x0948, transliteration_modifier_ai},
|
||||
{0x094d, transliteration_modifier_virama},
|
||||
{0, NULL}
|
||||
{0, 0, NULL}
|
||||
};
|
||||
|
||||
struct transliteration_context *transliteration_context_iast_alloc()
|
||||
|
@ -178,8 +100,7 @@ struct transliteration_context *transliteration_context_iast_alloc()
|
|||
struct transliteration_context *context;
|
||||
|
||||
context = malloc(sizeof(*context));
|
||||
context->table_letters = table_letters;
|
||||
context->table_modifiers = table_modifiers;
|
||||
context->table = table;
|
||||
|
||||
return context;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include "syllable.h"
|
||||
#include "utf8.h"
|
||||
|
||||
const struct transliteration_letter *find_letter_by_code(unsigned long c,
|
||||
static const struct transliteration_letter *find_letter_by_code(unsigned long c,
|
||||
const struct transliteration_letter *table)
|
||||
{
|
||||
const struct transliteration_letter *walk = table;
|
||||
|
@ -24,21 +24,16 @@ const struct transliteration_letter *find_letter_by_code(unsigned long c,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
const struct transliteration_modifier *find_modifier_by_code(unsigned long c,
|
||||
const struct transliteration_modifier *table)
|
||||
static void syllable_modify(struct syllable *syllable, const char *data)
|
||||
{
|
||||
const struct transliteration_modifier *walk = table;
|
||||
char buffer[10];
|
||||
|
||||
if (c == 0)
|
||||
return NULL;
|
||||
strcpy(buffer, syllable->data);
|
||||
buffer[strlen(buffer) - 1] = 0;
|
||||
strcat(buffer, data);
|
||||
|
||||
while (walk->code != 0) {
|
||||
if (c == walk->code)
|
||||
return walk;
|
||||
walk++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
free(syllable->data);
|
||||
syllable->data = strdup(buffer);
|
||||
}
|
||||
|
||||
char *transliterate_devanagari_to_latin(const char *text,
|
||||
|
@ -51,7 +46,6 @@ char *transliterate_devanagari_to_latin(const char *text,
|
|||
unsigned long c;
|
||||
struct syllable *head, *tail;
|
||||
const struct transliteration_letter *letter;
|
||||
const struct transliteration_modifier *modifier;
|
||||
|
||||
head = syllable_alloc("");
|
||||
tail = head;
|
||||
|
@ -60,15 +54,14 @@ char *transliterate_devanagari_to_latin(const char *text,
|
|||
c = utf8_unpack_char(ptr);
|
||||
ptr += utf8_char_length(c);
|
||||
|
||||
letter = find_letter_by_code(c, context->table_letters);
|
||||
letter = find_letter_by_code(c, context->table);
|
||||
if (letter != NULL) {
|
||||
tail = syllable_append(tail, letter->data);
|
||||
continue;
|
||||
}
|
||||
|
||||
modifier = find_modifier_by_code(c, context->table_modifiers);
|
||||
if (modifier != NULL) {
|
||||
modifier->modifier(tail);
|
||||
if (letter->flags & FLAG_REGULAR)
|
||||
tail = syllable_append(tail, letter->data);
|
||||
else if (letter->flags & FLAG_MODIFIER)
|
||||
syllable_modify(tail, letter->data);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -5,21 +5,18 @@
|
|||
|
||||
#include "syllable.h"
|
||||
|
||||
|
||||
#define FLAG_REGULAR 1 << 0
|
||||
#define FLAG_MODIFIER 1 << 1
|
||||
|
||||
struct transliteration_letter {
|
||||
unsigned long code;
|
||||
unsigned int flags;
|
||||
const char *data;
|
||||
};
|
||||
|
||||
typedef void (*syllable_modification_t)(struct syllable *syllable);
|
||||
|
||||
struct transliteration_modifier {
|
||||
unsigned long code;
|
||||
syllable_modification_t modifier;
|
||||
};
|
||||
|
||||
struct transliteration_context {
|
||||
const struct transliteration_letter *table_letters;
|
||||
const struct transliteration_modifier *table_modifiers;
|
||||
const struct transliteration_letter *table;
|
||||
};
|
||||
|
||||
char *transliterate_devanagari_to_latin(const char *text,
|
||||
|
|
Loading…
Add table
Reference in a new issue