handle ‘aum’ as aum only if followed by a whitespace

This commit is contained in:
Vlasta Vesely 2023-03-18 18:34:50 +01:00
parent 119ae59457
commit 755f75857e
No known key found for this signature in database
GPG key ID: EB0E649DC0DFCC22
3 changed files with 16 additions and 0 deletions

View file

@ -8,6 +8,7 @@
#include <fcntl.h> #include <fcntl.h>
#include <stdarg.h> #include <stdarg.h>
#include <getopt.h> #include <getopt.h>
#include <ctype.h>
#include <errno.h> #include <errno.h>
#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a) #define ARRAY_SIZE(a) sizeof(a) / sizeof(*a)

View file

@ -67,6 +67,11 @@ START_TEST(test_translit_lla_sylable)
} }
END_TEST END_TEST
START_TEST(test_translit_aum)
{
test_translit("ॐ औम औमे तौमे ॐ", "aum auma aume taume aum");
}
START_TEST(test_translit_candrabindu) START_TEST(test_translit_candrabindu)
{ {
test_translit("तान्यजत्राँ", "tānyajatrām̐"); test_translit("तान्यजत्राँ", "tānyajatrām̐");
@ -84,6 +89,7 @@ void register_translit_tests(TCase *test_case)
tcase_add_test(test_case, test_translit_words); tcase_add_test(test_case, test_translit_words);
tcase_add_test(test_case, test_translit_vedic); tcase_add_test(test_case, test_translit_vedic);
tcase_add_test(test_case, test_translit_lla_sylable); tcase_add_test(test_case, test_translit_lla_sylable);
tcase_add_test(test_case, test_translit_aum);
tcase_add_test(test_case, test_translit_candrabindu); tcase_add_test(test_case, test_translit_candrabindu);
tcase_add_test(test_case, test_translit_zero_width_joiner); tcase_add_test(test_case, test_translit_zero_width_joiner);
} }

View file

@ -8,6 +8,7 @@
#define SCHWA_CHARACTER 'a' #define SCHWA_CHARACTER 'a'
#define ZERO_WIDTH_JOINER 0x200d #define ZERO_WIDTH_JOINER 0x200d
#define VIRAMA 0x094d #define VIRAMA 0x094d
#define AUM 0x0950
#define CHUNKSIZE 1024 #define CHUNKSIZE 1024
static struct translit_letter table[] = { static struct translit_letter table[] = {
@ -259,6 +260,14 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret)
letter = letter_by_code(0x0933); /* .la */ letter = letter_by_code(0x0933); /* .la */
} }
} }
if (letter->code == AUM) {
/* aum is followed by something else than
* a whitespace it is au + m */
if (!isspace(src[3]) && src[3] != '\0') {
letter = letter_by_code(0x0914);
}
}
encode_consonant: encode_consonant:
/* A consonant or an initial vowel */ /* A consonant or an initial vowel */
utf8_pack_char(devanagari + done, letter->code); utf8_pack_char(devanagari + done, letter->code);