2018-04-25 19:30:49 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
#include "compat.h"
|
2018-04-25 16:13:34 +02:00
|
|
|
#include "transliteration.h"
|
2020-01-01 16:56:43 +01:00
|
|
|
#include "iast.h"
|
2018-04-25 16:13:34 +02:00
|
|
|
#include "utf8.h"
|
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
#define SCHWA_CHARACTER 'a'
|
|
|
|
#define CHUNKSIZE 1024
|
|
|
|
|
|
|
|
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
|
|
|
unsigned int c)
|
|
|
|
{
|
|
|
|
while (table->code != 0) {
|
|
|
|
if (table->code == c)
|
|
|
|
return table;
|
|
|
|
table++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
char *transliterate_devanagari_to_latin(const char *devanagari)
|
2018-04-25 16:13:34 +02:00
|
|
|
{
|
2020-01-01 16:56:43 +01:00
|
|
|
struct translit_letter *table;
|
2020-01-01 17:52:21 +01:00
|
|
|
struct translit_letter *letter;
|
|
|
|
unsigned int c, alloc = 0, done = 0, len;
|
|
|
|
const char *src = devanagari;
|
|
|
|
char *latin = NULL;
|
2018-04-25 16:13:34 +02:00
|
|
|
|
2020-01-01 16:56:43 +01:00
|
|
|
table = get_iast_transliteration_table();
|
2018-04-30 18:14:39 +02:00
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
while (1) {
|
|
|
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
|
|
|
latin = realloc(latin, alloc + CHUNKSIZE);
|
|
|
|
alloc += CHUNKSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
c = utf8_unpack_char(src);
|
|
|
|
len = utf8_char_length(c);
|
|
|
|
src += len;
|
|
|
|
|
|
|
|
letter = letter_by_code(table, c);
|
|
|
|
if (letter) {
|
|
|
|
switch (letter->type) {
|
|
|
|
case CONSONANT:
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
*(latin + done++) = SCHWA_CHARACTER;
|
|
|
|
break;
|
|
|
|
case VOWEL_SIGN:
|
|
|
|
if (done)
|
|
|
|
done--;
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
strcpy(latin + done, letter->data);
|
|
|
|
done += strlen(letter->data);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
utf8_pack_char(latin + done, c);
|
|
|
|
done += len;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c == 0)
|
|
|
|
break;
|
|
|
|
}
|
2018-05-18 10:09:06 +02:00
|
|
|
|
2020-01-01 17:52:21 +01:00
|
|
|
return latin;
|
2018-04-25 16:13:34 +02:00
|
|
|
}
|