From 93114de4a7b0b5fd11bc635f102c2284bd8a535b Mon Sep 17 00:00:00 2001 From: Vlasta Vesely Date: Fri, 18 May 2018 10:52:11 +0200 Subject: [PATCH] add punctation encoder --- Makefile | 2 +- encoder.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ encoder.h | 8 +++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 encoder.c create mode 100644 encoder.h diff --git a/Makefile b/Makefile index 47166e1..3d87939 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .PHONY: main test install uninstall clean -OBJS = syllable.o utf8.o transliteration.o iast.o iast-czech.o +OBJS = syllable.o utf8.o transliteration.o iast.o iast-czech.o encoder.o iast: main.o $(OBJS) diff --git a/encoder.c b/encoder.c new file mode 100644 index 0000000..6110827 --- /dev/null +++ b/encoder.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +#include "encoder.h" +#include "utf8.h" + +struct encoder_tuple { + const char *from; + const char *to; +}; + +static const struct encoder_tuple table[] = { + {"a-", "ā"}, + {"i-", "ī"}, + {"u-", "ū"}, + {"r.", "ṛ"}, + {"r.-", "ṝ"}, + {"l.-", "ḹ"}, + {"n^.", "ṅ"}, + {"n~", "ñ"}, + {"s,", "ś"}, + {"t.", "ṭ"}, + {"d.", "ḍ"}, + {"n.", "ṇ"}, + {"s.", "ṣ"}, + {"m.", "ṃ"}, + {"h.", "ḥ"}, + {NULL, NULL} +}; + +const struct encoder_tuple *find_tuple(const char *text) +{ + const struct encoder_tuple *walk = table; + + while (walk->from != NULL) { + if (strncmp(text, walk->from, strlen(walk->from)) == 0) { + return walk; + } + walk++; + } + + return NULL; +} + +char *encode_iast_punctation(const char *text) +{ + const char *str = text, *end = str + strlen(str); + const struct encoder_tuple *tuple; + char *buf, *dest; + + buf = malloc(strlen(text) << 1); + buf[0] = 0; + dest = buf; + + while (str < end) { + tuple = find_tuple(str); + if (tuple) { + sprintf(dest, "%s", tuple->to); + str += strlen(tuple->from); + dest += strlen(tuple->to); + } else { + sprintf(dest, "%c", *str); + str++; + dest++; + } + } + + return buf; +} diff --git a/encoder.h b/encoder.h new file mode 100644 index 0000000..bd02fb0 --- /dev/null +++ b/encoder.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ENCODER_H +#define __ENCODER_H + +char *encode_iast_punctation(const char *text); + +#endif /* __ENCODER_H */