handle ‘aum’ as aum only if followed by a whitespace
This commit is contained in:
parent
119ae59457
commit
755f75857e
3 changed files with 16 additions and 0 deletions
1
compat.h
1
compat.h
|
@ -8,6 +8,7 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a)
|
#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a)
|
||||||
|
|
|
@ -67,6 +67,11 @@ START_TEST(test_translit_lla_sylable)
|
||||||
}
|
}
|
||||||
END_TEST
|
END_TEST
|
||||||
|
|
||||||
|
START_TEST(test_translit_aum)
|
||||||
|
{
|
||||||
|
test_translit("ॐ औम औमे तौमे ॐ", "aum auma aume taume aum");
|
||||||
|
}
|
||||||
|
|
||||||
START_TEST(test_translit_candrabindu)
|
START_TEST(test_translit_candrabindu)
|
||||||
{
|
{
|
||||||
test_translit("तान्यजत्राँ", "tānyajatrām̐");
|
test_translit("तान्यजत्राँ", "tānyajatrām̐");
|
||||||
|
@ -84,6 +89,7 @@ void register_translit_tests(TCase *test_case)
|
||||||
tcase_add_test(test_case, test_translit_words);
|
tcase_add_test(test_case, test_translit_words);
|
||||||
tcase_add_test(test_case, test_translit_vedic);
|
tcase_add_test(test_case, test_translit_vedic);
|
||||||
tcase_add_test(test_case, test_translit_lla_sylable);
|
tcase_add_test(test_case, test_translit_lla_sylable);
|
||||||
|
tcase_add_test(test_case, test_translit_aum);
|
||||||
tcase_add_test(test_case, test_translit_candrabindu);
|
tcase_add_test(test_case, test_translit_candrabindu);
|
||||||
tcase_add_test(test_case, test_translit_zero_width_joiner);
|
tcase_add_test(test_case, test_translit_zero_width_joiner);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#define SCHWA_CHARACTER 'a'
|
#define SCHWA_CHARACTER 'a'
|
||||||
#define ZERO_WIDTH_JOINER 0x200d
|
#define ZERO_WIDTH_JOINER 0x200d
|
||||||
#define VIRAMA 0x094d
|
#define VIRAMA 0x094d
|
||||||
|
#define AUM 0x0950
|
||||||
#define CHUNKSIZE 1024
|
#define CHUNKSIZE 1024
|
||||||
|
|
||||||
static struct translit_letter table[] = {
|
static struct translit_letter table[] = {
|
||||||
|
@ -259,6 +260,14 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
||||||
letter = letter_by_code(0x0933); /* .la */
|
letter = letter_by_code(0x0933); /* .la */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (letter->code == AUM) {
|
||||||
|
/* ‘aum’ is followed by something else than
|
||||||
|
* a whitespace → it is ‘au’ + ‘m…’ */
|
||||||
|
if (!isspace(src[3]) && src[3] != '\0') {
|
||||||
|
letter = letter_by_code(0x0914);
|
||||||
|
}
|
||||||
|
}
|
||||||
encode_consonant:
|
encode_consonant:
|
||||||
/* A consonant or an initial vowel */
|
/* A consonant or an initial vowel */
|
||||||
utf8_pack_char(devanagari + done, letter->code);
|
utf8_pack_char(devanagari + done, letter->code);
|
||||||
|
|
Loading…
Reference in a new issue