sanskrit-iast/transliteration.c

150 lines
3.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#include "compat.h"
#include "transliteration.h"
#include "iast.h"
#include "utf8.h"
#define SCHWA_CHARACTER 'a'
#define VIRAMA 0x094d
#define CHUNKSIZE 1024
static struct translit_letter *letter_by_code(struct translit_letter *table,
unsigned int c)
{
while (table->code != 0) {
if (table->code == c)
return table;
table++;
}
return NULL;
}
char *transliterate_devanagari_to_latin(const char *devanagari)
{
struct translit_letter *table, *letter;
unsigned int c, alloc = 0, done = 0, len;
const char *src = devanagari;
char *latin = NULL;
table = get_iast_transliteration_table();
while (1) {
if (alloc < done + UNICODE_MAX_LENGTH) {
latin = realloc(latin, alloc + CHUNKSIZE);
alloc += CHUNKSIZE;
}
c = utf8_unpack_char(src);
len = utf8_char_length(c);
src += len;
letter = letter_by_code(table, c);
if (letter) {
switch (letter->type) {
case CONSONANT:
strcpy(latin + done, letter->data);
done += strlen(letter->data);
*(latin + done++) = SCHWA_CHARACTER;
break;
case VOWEL_SIGN:
if (done)
done--;
strcpy(latin + done, letter->data);
done += strlen(letter->data);
break;
default:
strcpy(latin + done, letter->data);
done += strlen(letter->data);
break;
}
} else {
utf8_pack_char(latin + done, c);
done += len;
}
if (c == 0)
break;
}
return latin;
}
static struct translit_letter *letter_by_data(struct translit_letter *table,
const char *data)
{
while (table->code != 0) {
unsigned int len = strlen(table->data);
if (len && strncmp(table->data, data, len) == 0)
return table;
table++;
}
return NULL;
}
static struct translit_letter *vowel_sign_by_data(struct translit_letter *table,
const char *data)
{
while (table->code != 0) {
unsigned int len = strlen(table->data);
if (len && strncmp(table->data, data, len) == 0 &&
table->type == VOWEL_SIGN)
return table;
table++;
}
return NULL;
}
char *transliterate_latin_to_devanagari(const char *latin)
{
struct translit_letter *table, *letter, *next;
unsigned int alloc = 0, done = 0, len;
const char *src = latin;
char *devanagari = NULL;
table = get_iast_transliteration_table();
while (*src) {
if (alloc < done + UNICODE_MAX_LENGTH) {
devanagari = realloc(devanagari, alloc + CHUNKSIZE);
alloc += CHUNKSIZE;
}
letter = letter_by_data(table, src);
if (letter) {
utf8_pack_char(devanagari + done, letter->code);
len = utf8_char_length(letter->code);
done += len;
src += strlen(letter->data);
if (letter->type == VOWEL || letter->type == CODA)
continue;
next = vowel_sign_by_data(table, src);
if (next) {
utf8_pack_char(devanagari + done, next->code);
done += utf8_char_length(next->code);
src += strlen(next->data);
} else {
if (*src == SCHWA_CHARACTER) {
src++;
} else {
if (letter->type == CONSONANT) {
utf8_pack_char(devanagari + done, VIRAMA);
done += utf8_char_length(VIRAMA);
}
}
}
} else {
devanagari[done++] = *src++;
}
}
if (devanagari)
devanagari[done] = '\0';
return devanagari;
}