2018-04-25 19:30:49 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
#include "compat.h"
|
2018-04-25 16:13:34 +02:00
|
|
|
#include "transliteration.h"
|
2020-01-01 16:56:43 +01:00
|
|
|
#include "iast.h"
|
2018-04-25 16:13:34 +02:00
|
|
|
#include "utf8.h"
|
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
#define SCHWA_CHARACTER 'a'
|
2020-01-01 22:02:44 +01:00
|
|
|
#define VIRAMA 0x094d
|
2020-01-01 17:52:21 +01:00
|
|
|
#define CHUNKSIZE 1024
|
|
|
|
|
|
|
|
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
|
|
|
unsigned int c)
|
|
|
|
{
|
|
|
|
while (table->code != 0) {
|
|
|
|
if (table->code == c)
|
|
|
|
return table;
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
char *transliterate_devanagari_to_latin(const char *devanagari)
|
2018-04-25 16:13:34 +02:00
|
|
|
{
|
2020-01-01 22:02:44 +01:00
|
|
|
struct translit_letter *table, *letter;
|
2020-01-01 17:52:21 +01:00
|
|
|
unsigned int c, alloc = 0, done = 0, len;
|
|
|
|
const char *src = devanagari;
|
|
|
|
char *latin = NULL;
|
2018-04-25 16:13:34 +02:00
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
table = get_iast_transliteration_table();
|
2018-04-30 18:14:39 +02:00
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
while (1) {
|
|
|
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
|
|
|
latin = realloc(latin, alloc + CHUNKSIZE);
|
|
|
|
alloc += CHUNKSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = utf8_unpack_char(src);
|
|
|
|
len = utf8_char_length(c);
|
|
|
|
src += len;
|
|
|
|
|
|
|
|
letter = letter_by_code(table, c);
|
|
|
|
if (letter) {
|
|
|
|
switch (letter->type) {
|
|
|
|
case CONSONANT:
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
*(latin + done++) = SCHWA_CHARACTER;
|
|
|
|
break;
|
|
|
|
case VOWEL_SIGN:
|
|
|
|
if (done)
|
|
|
|
done--;
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
utf8_pack_char(latin + done, c);
|
|
|
|
done += len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c == 0)
|
|
|
|
break;
|
|
|
|
}
|
2018-05-18 10:09:06 +02:00
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
return latin;
|
2018-04-25 16:13:34 +02:00
|
|
|
}
|
2020-01-01 22:02:44 +01:00
|
|
|
|
|
|
|
static struct translit_letter *letter_by_data(struct translit_letter *table,
|
|
|
|
const char *data)
|
|
|
|
{
|
|
|
|
while (table->code != 0) {
|
|
|
|
unsigned int len = strlen(table->data);
|
|
|
|
if (len && strncmp(table->data, data, len) == 0)
|
|
|
|
return table;
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct translit_letter *vowel_sign_by_data(struct translit_letter *table,
|
|
|
|
const char *data)
|
|
|
|
{
|
|
|
|
while (table->code != 0) {
|
|
|
|
unsigned int len = strlen(table->data);
|
|
|
|
if (len && strncmp(table->data, data, len) == 0 &&
|
|
|
|
table->type == VOWEL_SIGN)
|
|
|
|
return table;
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
char *transliterate_latin_to_devanagari(const char *latin)
|
|
|
|
{
|
|
|
|
struct translit_letter *table, *letter, *next;
|
|
|
|
unsigned int alloc = 0, done = 0, len;
|
|
|
|
const char *src = latin;
|
|
|
|
char *devanagari = NULL;
|
|
|
|
|
|
|
|
table = get_iast_transliteration_table();
|
|
|
|
|
|
|
|
while (*src) {
|
|
|
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
|
|
|
devanagari = realloc(devanagari, alloc + CHUNKSIZE);
|
|
|
|
alloc += CHUNKSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
letter = letter_by_data(table, src);
|
|
|
|
if (letter) {
|
|
|
|
utf8_pack_char(devanagari + done, letter->code);
|
|
|
|
len = utf8_char_length(letter->code);
|
|
|
|
done += len;
|
|
|
|
src += strlen(letter->data);
|
|
|
|
|
|
|
|
if (letter->type == VOWEL || letter->type == CODA)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
next = vowel_sign_by_data(table, src);
|
|
|
|
if (next) {
|
|
|
|
utf8_pack_char(devanagari + done, next->code);
|
|
|
|
done += utf8_char_length(next->code);
|
|
|
|
src += strlen(next->data);
|
|
|
|
} else {
|
|
|
|
if (*src == SCHWA_CHARACTER) {
|
|
|
|
src++;
|
|
|
|
} else {
|
|
|
|
if (letter->type == CONSONANT) {
|
|
|
|
utf8_pack_char(devanagari + done, VIRAMA);
|
|
|
|
done += utf8_char_length(VIRAMA);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
devanagari[done++] = *src++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (devanagari)
|
|
|
|
devanagari[done] = '\0';
|
|
|
|
|
|
|
|
return devanagari;
|
|
|
|
}
|