add harvard-kyoto.c

This commit is contained in:
Vlasta Vesely 2023-02-13 08:48:26 +01:00
parent 200a3f0ef2
commit 5252ae73e1
6 changed files with 136 additions and 2 deletions

View file

@ -24,10 +24,11 @@ LFLAGS = @COVERAGE_LFLAGS@
TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@ TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@
TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@ TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@
OBJECTS = transliteration.o czech.o hindi.o utf8.o velthuis.o OBJECTS = transliteration.o czech.o hindi.o utf8.o velthuis.o harvard-kyoto.o
TEST_OBJECTS = tests/main.o tests/translit.o tests/czech.o \ TEST_OBJECTS = tests/main.o tests/translit.o tests/czech.o \
tests/hindi.o tests/velthuis.o tests/utf8.o tests/integration.o tests/hindi.o tests/velthuis.o tests/harvard-kyoto.o \
tests/utf8.o tests/integration.o
AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \ AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \
*.status *.cache *.status *.cache

85
harvard-kyoto.c Normal file
View file

@ -0,0 +1,85 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* https://en.wikipedia.org/wiki/Velthuis */
#include "compat.h"
#include "harvard-kyoto.h"
#include "utf8.h"
struct encoder_tuple {
const char *from;
const char *to;
};
static const struct encoder_tuple table[] = {
{"A", "\u0101"},
{"I", "\u012b"},
{"U", "\u016b"},
{"RR", "\u1e5d"},
{"R", "\u1e5b"},
{"LL", "\u1e39"},
{"L", "\u1e37"},
{"aa", "\u0101"},
{"ii", "\u012b"},
{"uu", "\u016b"},
{".rr", "\u1e5d"},
{".r", "\u1e5b"},
{".ll", "\u1e39"},
{".l", "\u1e37"},
{"M", "\u1e43"},
{"H", "\u1e25"},
{"G", "\u1e45"},
{"J", "\u00f1"},
{"T", "\u1e6d"},
{"D", "\u1e0d"},
{"N", "\u1e47"},
{"S", "\u1e63"},
{"sh", "\u015b"},
{"z", "\u015b"}
};
static const struct encoder_tuple *find_tuple(const char *text)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(table); i++) {
if (strncmp(text, table[i].from, strlen(table[i].from)) == 0) {
return &table[i];
}
}
return NULL;
}
int encode_harvard_kyoto_to_iast(const char *text, char **out)
{
const char *str = text, *end = str + strlen(str);
const struct encoder_tuple *tuple;
char *buf, *dest;
buf = calloc(1, strlen(text) * 3); /* should be enough */
if (buf == NULL)
return ENOMEM;
dest = buf;
while (str < end) {
tuple = find_tuple(str);
if (tuple) {
sprintf(dest, "%s", tuple->to);
str += strlen(tuple->from);
dest += strlen(tuple->to);
} else if (strncmp(str, "{}", 2) == 0) {
str += 2;
} else {
sprintf(dest, "%c", *str);
str++;
dest++;
}
}
*out = buf;
return 0;
}

8
harvard-kyoto.h Normal file
View file

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __HARVARD_KYOTO_H
#define __HARVARD_KYOTO_H
int encode_harvard_kyoto_to_iast(const char *text, char **out);
#endif /* __HARVARD_KYOTO_H */

30
tests/harvard-kyoto.c Normal file
View file

@ -0,0 +1,30 @@
#include "test.h"
#include "harvard-kyoto.h"
#include "../harvard-kyoto.h"
static void test_harvard_kyoto(const char *hk, const char *iast)
{
char *str;
int ret;
ret = encode_harvard_kyoto_to_iast(hk, &str);
ck_assert_int_eq(0, ret);
ck_assert_str_eq(iast, str);
free(str);
}
START_TEST(test_harvard_kyoto_encoding)
{
test_harvard_kyoto("saMskRtam", "saṃskṛtam");
test_harvard_kyoto("yogaH", "yogaḥ");
test_harvard_kyoto("zaastraM shaastram", "śāstraṃ śāstram");
test_harvard_kyoto("liGga yajJa varNa", "liṅga yajña varṇa");
test_harvard_kyoto("Aaa Iii Uuu RR R L", "āā īī ūū ṝ ṛ ḷ");
}
END_TEST
void register_harvard_kyoto_encoder_tests(TCase *test_case)
{
tcase_add_test(test_case, test_harvard_kyoto_encoding);
}

8
tests/harvard-kyoto.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef __TEST_HARVARD_KYOTO_H
#define __TEST_HARVARD_KYOTO_H
#include <check.h>
void register_harvard_kyoto_encoder_tests(TCase *test_case);
#endif /* __TEST_HARVARD_KYOTO_H */

View file

@ -3,6 +3,7 @@
#include "czech.h" #include "czech.h"
#include "hindi.h" #include "hindi.h"
#include "velthuis.h" #include "velthuis.h"
#include "harvard-kyoto.h"
#include "utf8.h" #include "utf8.h"
#include "integration.h" #include "integration.h"
@ -18,6 +19,7 @@ static Suite *create_test_suite()
register_transcript_czech_tests(test_case); register_transcript_czech_tests(test_case);
register_transcript_hindi_tests(test_case); register_transcript_hindi_tests(test_case);
register_velthuis_encoder_tests(test_case); register_velthuis_encoder_tests(test_case);
register_harvard_kyoto_encoder_tests(test_case);
register_utf8_tests(test_case); register_utf8_tests(test_case);
register_integration_tests(test_case); register_integration_tests(test_case);