diff --git a/Makefile.in b/Makefile.in index 56e7fc5..ca9164f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,10 +24,11 @@ LFLAGS = @COVERAGE_LFLAGS@ TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@ TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@ -OBJECTS = transliteration.o czech.o hindi.o utf8.o velthuis.o +OBJECTS = transliteration.o czech.o hindi.o utf8.o velthuis.o harvard-kyoto.o TEST_OBJECTS = tests/main.o tests/translit.o tests/czech.o \ - tests/hindi.o tests/velthuis.o tests/utf8.o tests/integration.o + tests/hindi.o tests/velthuis.o tests/harvard-kyoto.o \ + tests/utf8.o tests/integration.o AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \ *.status *.cache diff --git a/harvard-kyoto.c b/harvard-kyoto.c new file mode 100644 index 0000000..0320bb3 --- /dev/null +++ b/harvard-kyoto.c @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* https://en.wikipedia.org/wiki/Velthuis */ + +#include "compat.h" +#include "harvard-kyoto.h" +#include "utf8.h" + +struct encoder_tuple { + const char *from; + const char *to; +}; + +static const struct encoder_tuple table[] = { + {"A", "\u0101"}, + {"I", "\u012b"}, + {"U", "\u016b"}, + {"RR", "\u1e5d"}, + {"R", "\u1e5b"}, + {"LL", "\u1e39"}, + {"L", "\u1e37"}, + + {"aa", "\u0101"}, + {"ii", "\u012b"}, + {"uu", "\u016b"}, + {".rr", "\u1e5d"}, + {".r", "\u1e5b"}, + {".ll", "\u1e39"}, + {".l", "\u1e37"}, + + {"M", "\u1e43"}, + {"H", "\u1e25"}, + + {"G", "\u1e45"}, + {"J", "\u00f1"}, + {"T", "\u1e6d"}, + {"D", "\u1e0d"}, + {"N", "\u1e47"}, + {"S", "\u1e63"}, + {"sh", "\u015b"}, + {"z", "\u015b"} +}; + +static const struct encoder_tuple *find_tuple(const char *text) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + if (strncmp(text, table[i].from, strlen(table[i].from)) == 0) { + return &table[i]; + } + } + + return NULL; +} + +int encode_harvard_kyoto_to_iast(const char *text, char **out) +{ + const char *str = text, *end = str + strlen(str); + const struct encoder_tuple *tuple; + char *buf, *dest; + + buf = calloc(1, strlen(text) * 3); /* should be enough */ + if (buf == NULL) + return ENOMEM; + + dest = buf; + while (str < end) { + tuple = find_tuple(str); + if (tuple) { + sprintf(dest, "%s", tuple->to); + str += strlen(tuple->from); + dest += strlen(tuple->to); + } else if (strncmp(str, "{}", 2) == 0) { + str += 2; + } else { + sprintf(dest, "%c", *str); + str++; + dest++; + } + } + + *out = buf; + + return 0; +} diff --git a/harvard-kyoto.h b/harvard-kyoto.h new file mode 100644 index 0000000..10727fb --- /dev/null +++ b/harvard-kyoto.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __HARVARD_KYOTO_H +#define __HARVARD_KYOTO_H + +int encode_harvard_kyoto_to_iast(const char *text, char **out); + +#endif /* __HARVARD_KYOTO_H */ diff --git a/tests/harvard-kyoto.c b/tests/harvard-kyoto.c new file mode 100644 index 0000000..caae9e3 --- /dev/null +++ b/tests/harvard-kyoto.c @@ -0,0 +1,30 @@ +#include "test.h" +#include "harvard-kyoto.h" +#include "../harvard-kyoto.h" + +static void test_harvard_kyoto(const char *hk, const char *iast) +{ + char *str; + int ret; + + ret = encode_harvard_kyoto_to_iast(hk, &str); + ck_assert_int_eq(0, ret); + ck_assert_str_eq(iast, str); + + free(str); +} + +START_TEST(test_harvard_kyoto_encoding) +{ + test_harvard_kyoto("saMskRtam", "saṃskṛtam"); + test_harvard_kyoto("yogaH", "yogaḥ"); + test_harvard_kyoto("zaastraM shaastram", "śāstraṃ śāstram"); + test_harvard_kyoto("liGga yajJa varNa", "liṅga yajña varṇa"); + test_harvard_kyoto("Aaa Iii Uuu RR R L", "āā īī ūū ṝ ṛ ḷ"); +} +END_TEST + +void register_harvard_kyoto_encoder_tests(TCase *test_case) +{ + tcase_add_test(test_case, test_harvard_kyoto_encoding); +} diff --git a/tests/harvard-kyoto.h b/tests/harvard-kyoto.h new file mode 100644 index 0000000..c012724 --- /dev/null +++ b/tests/harvard-kyoto.h @@ -0,0 +1,8 @@ +#ifndef __TEST_HARVARD_KYOTO_H +#define __TEST_HARVARD_KYOTO_H + +#include + +void register_harvard_kyoto_encoder_tests(TCase *test_case); + +#endif /* __TEST_HARVARD_KYOTO_H */ diff --git a/tests/main.c b/tests/main.c index cd94582..acd66a6 100644 --- a/tests/main.c +++ b/tests/main.c @@ -3,6 +3,7 @@ #include "czech.h" #include "hindi.h" #include "velthuis.h" +#include "harvard-kyoto.h" #include "utf8.h" #include "integration.h" @@ -18,6 +19,7 @@ static Suite *create_test_suite() register_transcript_czech_tests(test_case); register_transcript_hindi_tests(test_case); register_velthuis_encoder_tests(test_case); + register_harvard_kyoto_encoder_tests(test_case); register_utf8_tests(test_case); register_integration_tests(test_case);