implement reverse transliteration (latin -> devanagari)
This commit is contained in:
parent
f4d545f4d1
commit
412be221a7
4 changed files with 108 additions and 6 deletions
6
iast.c
6
iast.c
|
@ -6,6 +6,9 @@
|
|||
|
||||
static struct translit_letter table[] = {
|
||||
|
||||
/* Special characters */
|
||||
{0x0950, SPECIAL, "aum"}, /* aum */
|
||||
|
||||
/* Vowels */
|
||||
{0x0910, VOWEL, "ai"}, /* 01 */
|
||||
{0x0914, VOWEL, "au"}, /* 02 */
|
||||
|
@ -62,9 +65,6 @@ static struct translit_letter table[] = {
|
|||
{0x0903, CODA, "\u1e25"}, /* visarga (.h) */
|
||||
{0x093d, CODA, "'"}, /* avagrada (') */
|
||||
|
||||
/* Special characters */
|
||||
{0x0950, SPECIAL, "aum"}, /* aum */
|
||||
|
||||
/* Numbers */
|
||||
{0x0966, NUMBER, "0"},
|
||||
{0x0967, NUMBER, "1"},
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "test.h"
|
||||
#include "translit.h"
|
||||
#include "../transliteration.h"
|
||||
|
||||
|
||||
START_TEST(test_translit_devanagari_to_latin)
|
||||
{
|
||||
char *latin;
|
||||
|
@ -28,7 +28,30 @@ START_TEST(test_translit_devanagari_to_latin)
|
|||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_translit_latin_to_devanagari)
|
||||
{
|
||||
char *devanagari;
|
||||
|
||||
devanagari = transliterate_latin_to_devanagari("saṃskṛtam");
|
||||
ck_assert_str_eq("संस्कृतम्", devanagari);
|
||||
free(devanagari);
|
||||
|
||||
devanagari = transliterate_latin_to_devanagari("bhagavadgītā");
|
||||
ck_assert_str_eq("भगवद्गीता", devanagari);
|
||||
free(devanagari);
|
||||
|
||||
devanagari = transliterate_latin_to_devanagari("āryāvarta");
|
||||
ck_assert_str_eq("आर्यावर्त", devanagari);
|
||||
free(devanagari);
|
||||
|
||||
devanagari = transliterate_latin_to_devanagari("mahābhāratam");
|
||||
ck_assert_str_eq("महाभारतम्", devanagari);
|
||||
free(devanagari);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
void register_translit_tests(TCase *test_case)
|
||||
{
|
||||
tcase_add_test(test_case, test_translit_devanagari_to_latin);
|
||||
tcase_add_test(test_case, test_translit_latin_to_devanagari);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "utf8.h"
|
||||
|
||||
#define SCHWA_CHARACTER 'a'
|
||||
#define VIRAMA 0x094d
|
||||
#define CHUNKSIZE 1024
|
||||
|
||||
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
||||
|
@ -22,8 +23,7 @@ static struct translit_letter *letter_by_code(struct translit_letter *table,
|
|||
|
||||
char *transliterate_devanagari_to_latin(const char *devanagari)
|
||||
{
|
||||
struct translit_letter *table;
|
||||
struct translit_letter *letter;
|
||||
struct translit_letter *table, *letter;
|
||||
unsigned int c, alloc = 0, done = 0, len;
|
||||
const char *src = devanagari;
|
||||
char *latin = NULL;
|
||||
|
@ -70,3 +70,81 @@ char *transliterate_devanagari_to_latin(const char *devanagari)
|
|||
|
||||
return latin;
|
||||
}
|
||||
|
||||
static struct translit_letter *letter_by_data(struct translit_letter *table,
|
||||
const char *data)
|
||||
{
|
||||
while (table->code != 0) {
|
||||
unsigned int len = strlen(table->data);
|
||||
if (len && strncmp(table->data, data, len) == 0)
|
||||
return table;
|
||||
table++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct translit_letter *vowel_sign_by_data(struct translit_letter *table,
|
||||
const char *data)
|
||||
{
|
||||
while (table->code != 0) {
|
||||
unsigned int len = strlen(table->data);
|
||||
if (len && strncmp(table->data, data, len) == 0 &&
|
||||
table->type == VOWEL_SIGN)
|
||||
return table;
|
||||
table++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *transliterate_latin_to_devanagari(const char *latin)
|
||||
{
|
||||
struct translit_letter *table, *letter, *next;
|
||||
unsigned int alloc = 0, done = 0, len;
|
||||
const char *src = latin;
|
||||
char *devanagari = NULL;
|
||||
|
||||
table = get_iast_transliteration_table();
|
||||
|
||||
while (*src) {
|
||||
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||
devanagari = realloc(devanagari, alloc + CHUNKSIZE);
|
||||
alloc += CHUNKSIZE;
|
||||
}
|
||||
|
||||
letter = letter_by_data(table, src);
|
||||
if (letter) {
|
||||
utf8_pack_char(devanagari + done, letter->code);
|
||||
len = utf8_char_length(letter->code);
|
||||
done += len;
|
||||
src += strlen(letter->data);
|
||||
|
||||
if (letter->type == VOWEL || letter->type == CODA)
|
||||
continue;
|
||||
|
||||
next = vowel_sign_by_data(table, src);
|
||||
if (next) {
|
||||
utf8_pack_char(devanagari + done, next->code);
|
||||
done += utf8_char_length(next->code);
|
||||
src += strlen(next->data);
|
||||
} else {
|
||||
if (*src == SCHWA_CHARACTER) {
|
||||
src++;
|
||||
} else {
|
||||
if (letter->type == CONSONANT) {
|
||||
utf8_pack_char(devanagari + done, VIRAMA);
|
||||
done += utf8_char_length(VIRAMA);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
devanagari[done++] = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
if (devanagari)
|
||||
devanagari[done] = '\0';
|
||||
|
||||
return devanagari;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ struct translit_context {
|
|||
};
|
||||
|
||||
char *transliterate_devanagari_to_latin(const char *text);
|
||||
char *transliterate_latin_to_devanagari(const char *text);
|
||||
|
||||
static inline int is_devanagari(unsigned int code)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue