From b5672070b67193376dbdaf859b18c15b30c4308b Mon Sep 17 00:00:00 2001 From: Vlasta Vesely Date: Sat, 13 Mar 2021 16:08:12 +0100 Subject: [PATCH] merge transliteration.c + iast.c and transcription.c + iast-czech.c --- Makefile.in | 5 +- compat.h | 2 + configure.ac | 2 +- iast-czech.c | 103 ------------------------------ iast-czech.h | 10 --- iast.c | 104 ------------------------------- iast.h | 10 --- transcription.c | 113 +++++++++++++++++++++++++++++---- transliteration.c | 155 ++++++++++++++++++++++++++++++++++++---------- velthuis.c | 2 - 10 files changed, 230 insertions(+), 276 deletions(-) delete mode 100644 iast-czech.c delete mode 100644 iast-czech.h delete mode 100644 iast.c delete mode 100644 iast.h diff --git a/Makefile.in b/Makefile.in index 76c3f2e..603a390 100644 --- a/Makefile.in +++ b/Makefile.in @@ -15,13 +15,12 @@ mandir = @mandir@ USE_GCOV = @USE_GCOV@ CFLAGS = -Wall @CFLAGS@ @COVERAGE_CFLAGS@ -LFLAGS = +LFLAGS = @COVERAGE_LFLAGS@ TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@ TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@ -OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o \ - velthuis.o +OBJECTS = transliteration.o transcription.o utf8.o velthuis.o TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \ tests/velthuis.o tests/utf8.o tests/integration.o diff --git a/compat.h b/compat.h index 78cf55e..20bfc27 100644 --- a/compat.h +++ b/compat.h @@ -10,6 +10,8 @@ #include #include +#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a) + enum err { EHINDI = 1 }; diff --git a/configure.ac b/configure.ac index c454a9b..488d07d 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ AC_PREREQ([2.69]) AC_INIT([sanskrit-iast], [2.0.0], [vlastavesely@protonmail.ch]) -AC_CONFIG_SRCDIR([iast.c]) +AC_CONFIG_SRCDIR([transliteration.c]) AC_CONFIG_HEADERS([config.h]) PROGNAME=iast diff --git a/iast-czech.c b/iast-czech.c deleted file mode 100644 index 802ebf4..0000000 --- a/iast-czech.c +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include "compat.h" -#include "iast-czech.h" - -static struct translit_letter table[] = { - - /* Special characters */ - {0x0950, SPECIAL, "óm"}, /* aum */ - - /* Vowels */ - {0x0910, VOWEL, "ai"}, /* 01 */ - {0x0914, VOWEL, "au"}, /* 02 */ - {0x0905, VOWEL, "a"}, /* 03 */ - {0x0906, VOWEL, "á"}, /* 04 */ - {0x0907, VOWEL, "i"}, /* 05 */ - {0x0908, VOWEL, "í"}, /* 06 */ - {0x0909, VOWEL, "u"}, /* 07 */ - {0x090a, VOWEL, "ú"}, /* 08 */ - {0x090b, VOWEL, "r"}, /* 09 */ - {0x0960, VOWEL, "r"}, /* 10 */ - {0x090c, VOWEL, "l"}, /* 11 */ - {0x0961, VOWEL, "l"}, /* 12 */ - {0x090f, VOWEL, "é"}, /* 13 */ - {0x0913, VOWEL, "ó"}, /* 14 */ - - /* Consonants */ - {0x0916, CONSONANT, "kh"}, /* 01 */ - {0x0918, CONSONANT, "gh"}, /* 02 */ - {0x091b, CONSONANT, "čh"}, /* 03 */ - {0x091d, CONSONANT, "džh"}, /* 04 */ - {0x091c, CONSONANT, "dž"}, /* 05 */ - {0x0920, CONSONANT, "th"}, /* 06 */ - {0x0922, CONSONANT, "dh"}, /* 07 */ - {0x0925, CONSONANT, "th"}, /* 08 */ - {0x0927, CONSONANT, "dh"}, /* 09 */ - {0x092b, CONSONANT, "ph"}, /* 10 */ - {0x092d, CONSONANT, "bh"}, /* 11 */ - {0x0915, CONSONANT, "k"}, /* 12 */ - {0x0917, CONSONANT, "g"}, /* 13 */ - {0x0919, CONSONANT, "n"}, /* 14 */ - {0x0939, CONSONANT, "h"}, /* 15 */ - {0x091a, CONSONANT, "č"}, /* 16 */ - {0x091e, CONSONANT, "ň"}, /* 17 */ - {0x092f, CONSONANT, "j"}, /* 18 */ - {0x0936, CONSONANT, "š"}, /* 19 */ - {0x091F, CONSONANT, "t"}, /* 20 */ - {0x0921, CONSONANT, "d"}, /* 21 */ - {0x0923, CONSONANT, "n"}, /* 22 */ - {0x0930, CONSONANT, "r"}, /* 23 */ - {0x0937, CONSONANT, "š"}, /* 24 */ - {0x0924, CONSONANT, "t"}, /* 25 */ - {0x0926, CONSONANT, "d"}, /* 26 */ - {0x0928, CONSONANT, "n"}, /* 27 */ - {0x0932, CONSONANT, "l"}, /* 28 */ - {0x0938, CONSONANT, "s"}, /* 29 */ - {0x092a, CONSONANT, "p"}, /* 30 */ - {0x092c, CONSONANT, "b"}, /* 31 */ - {0x092e, CONSONANT, "m"}, /* 32 */ - {0x0935, CONSONANT, "v"}, /* 33 */ - {0x0933, CONSONANT, "l"}, - - /* Codas */ - {0x0902, CODA, "m"}, /* anusvara */ - {0x0903, CODA, ""}, /* visarga */ - {0x093d, CODA, "'"}, /* avagrada */ - {0x0901, CODA, "m"}, /* candrabindu */ - - /* Numbers */ - {0x0966, NUMBER, "0"}, - {0x0967, NUMBER, "1"}, - {0x0968, NUMBER, "2"}, - {0x0969, NUMBER, "3"}, - {0x096a, NUMBER, "4"}, - {0x096b, NUMBER, "5"}, - {0x096c, NUMBER, "6"}, - {0x096d, NUMBER, "7"}, - {0x096e, NUMBER, "8"}, - {0x096f, NUMBER, "9"}, - - /* Diacritic modifiers */ - {0x0948, VOWEL_SIGN, "ai"}, - {0x094c, VOWEL_SIGN, "au"}, - {0x093e, VOWEL_SIGN, "á"}, - {0x093f, VOWEL_SIGN, "i"}, - {0x0940, VOWEL_SIGN, "í"}, - {0x0941, VOWEL_SIGN, "u"}, - {0x0942, VOWEL_SIGN, "ú"}, - {0x0943, VOWEL_SIGN, "r"}, - {0x0944, VOWEL_SIGN, "r"}, - {0x0962, VOWEL_SIGN, "l"}, - {0x0963, VOWEL_SIGN, "l"}, - {0x0947, VOWEL_SIGN, "é"}, - {0x094b, VOWEL_SIGN, "ó"}, - {0x094d, VOWEL_SIGN, ""}, /* virama */ - - {0, 0, NULL} -}; - -struct translit_letter *get_iast_czech_transliteration_table() -{ - return table; -} diff --git a/iast-czech.h b/iast-czech.h deleted file mode 100644 index 8bc3f96..0000000 --- a/iast-czech.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __IAST_CZECH_H -#define __IAST_CZECH_H - -#include "transliteration.h" - -struct translit_letter *get_iast_czech_transliteration_table(); - -#endif /* __IAST_CZECH_H */ diff --git a/iast.c b/iast.c deleted file mode 100644 index 91ead57..0000000 --- a/iast.c +++ /dev/null @@ -1,104 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* https://en.wikipedia.org/wiki/IAST */ - -#include "compat.h" -#include "iast.h" - -static struct translit_letter table[] = { - - /* Special characters */ - {0x0950, SPECIAL, "aum"}, /* aum */ - - /* Vowels */ - {0x0910, VOWEL, "ai"}, /* 01 */ - {0x0914, VOWEL, "au"}, /* 02 */ - {0x0905, VOWEL, "a"}, /* 03 */ - {0x0906, VOWEL, "\u0101"}, /* 04 (aa) */ - {0x0907, VOWEL, "i"}, /* 05 */ - {0x0908, VOWEL, "\u012b"}, /* 06 (ii) */ - {0x0909, VOWEL, "u"}, /* 07 */ - {0x090a, VOWEL, "\u016b"}, /* 08 (uu) */ - {0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */ - {0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */ - {0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */ - {0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */ - {0x090f, VOWEL, "e"}, /* 13 */ - {0x0913, VOWEL, "o"}, /* 14 */ - - /* Consonants */ - {0x0916, CONSONANT, "kh"}, /* 01 */ - {0x0918, CONSONANT, "gh"}, /* 02 */ - {0x091b, CONSONANT, "ch"}, /* 03 */ - {0x091d, CONSONANT, "jh"}, /* 04 */ - {0x0920, CONSONANT, "\u1e6dh"}, /* 05 (.th) */ - {0x0922, CONSONANT, "\u1e0dh"}, /* 06 (.dh) */ - {0x0925, CONSONANT, "th"}, /* 07 */ - {0x0927, CONSONANT, "dh"}, /* 08 */ - {0x092b, CONSONANT, "ph"}, /* 09 */ - {0x092d, CONSONANT, "bh"}, /* 10 */ - {0x0915, CONSONANT, "k"}, /* 11 */ - {0x0917, CONSONANT, "g"}, /* 12 */ - {0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */ - {0x0939, CONSONANT, "h"}, /* 14 */ - {0x091a, CONSONANT, "c"}, /* 15 */ - {0x091c, CONSONANT, "j"}, /* 16 */ - {0x091e, CONSONANT, "\u00f1"}, /* 17 (~n) */ - {0x092f, CONSONANT, "y"}, /* 18 */ - {0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */ - {0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */ - {0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */ - {0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */ - {0x0930, CONSONANT, "r"}, /* 23 */ - {0x0937, CONSONANT, "\u1e63"}, /* 24 (.s) */ - {0x0924, CONSONANT, "t"}, /* 25 */ - {0x0926, CONSONANT, "d"}, /* 26 */ - {0x0928, CONSONANT, "n"}, /* 27 */ - {0x0932, CONSONANT, "l"}, /* 28 */ - {0x0938, CONSONANT, "s"}, /* 29 */ - {0x092a, CONSONANT, "p"}, /* 30 */ - {0x092c, CONSONANT, "b"}, /* 31 */ - {0x092e, CONSONANT, "m"}, /* 32 */ - {0x0935, CONSONANT, "v"}, /* 33 */ - {0x0933, CONSONANT, "\u1e37"}, /* (.l) */ - - /* Codas */ - {0x0902, CODA, "\u1e43"}, /* anusvara (.m) */ - {0x0903, CODA, "\u1e25"}, /* visarga (.h) */ - {0x093d, CODA, "'"}, /* avagrada (') */ - {0x0901, CODA, "m\u0310"}, /* candrabindu */ - - /* Numbers */ - {0x0966, NUMBER, "0"}, - {0x0967, NUMBER, "1"}, - {0x0968, NUMBER, "2"}, - {0x0969, NUMBER, "3"}, - {0x096a, NUMBER, "4"}, - {0x096b, NUMBER, "5"}, - {0x096c, NUMBER, "6"}, - {0x096d, NUMBER, "7"}, - {0x096e, NUMBER, "8"}, - {0x096f, NUMBER, "9"}, - - /* Diacritic modifiers */ - {0x0948, VOWEL_SIGN, "ai"}, - {0x094c, VOWEL_SIGN, "au"}, - {0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */ - {0x093f, VOWEL_SIGN, "i"}, - {0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */ - {0x0941, VOWEL_SIGN, "u"}, - {0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */ - {0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */ - {0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */ - {0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */ - {0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */ - {0x0947, VOWEL_SIGN, "e"}, - {0x094b, VOWEL_SIGN, "o"}, - {0x094d, VOWEL_SIGN, ""}, /* virama */ - - {0, 0, NULL} -}; - -struct translit_letter *get_iast_transliteration_table() -{ - return table; -} diff --git a/iast.h b/iast.h deleted file mode 100644 index 34e6e4e..0000000 --- a/iast.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __IAST_H -#define __IAST_H - -#include "transliteration.h" - -struct translit_letter *get_iast_transliteration_table(); - -#endif /* __IAST_H */ diff --git a/transcription.c b/transcription.c index 8af00d5..104f9a9 100644 --- a/transcription.c +++ b/transcription.c @@ -3,7 +3,7 @@ #include "compat.h" #include "transcription.h" -#include "iast-czech.h" +#include "transliteration.h" #include "utf8.h" #define SCHWA_CHARACTER 'a' @@ -11,6 +11,98 @@ #define NUKTA 0x093c #define CHUNKSIZE 1024 +static struct translit_letter table[] = { + + /* Special characters */ + {0x0950, SPECIAL, "óm"}, /* aum */ + + /* Vowels */ + {0x0910, VOWEL, "ai"}, /* 01 */ + {0x0914, VOWEL, "au"}, /* 02 */ + {0x0905, VOWEL, "a"}, /* 03 */ + {0x0906, VOWEL, "á"}, /* 04 */ + {0x0907, VOWEL, "i"}, /* 05 */ + {0x0908, VOWEL, "í"}, /* 06 */ + {0x0909, VOWEL, "u"}, /* 07 */ + {0x090a, VOWEL, "ú"}, /* 08 */ + {0x090b, VOWEL, "r"}, /* 09 */ + {0x0960, VOWEL, "r"}, /* 10 */ + {0x090c, VOWEL, "l"}, /* 11 */ + {0x0961, VOWEL, "l"}, /* 12 */ + {0x090f, VOWEL, "é"}, /* 13 */ + {0x0913, VOWEL, "ó"}, /* 14 */ + + /* Consonants */ + {0x0916, CONSONANT, "kh"}, /* 01 */ + {0x0918, CONSONANT, "gh"}, /* 02 */ + {0x091b, CONSONANT, "čh"}, /* 03 */ + {0x091d, CONSONANT, "džh"}, /* 04 */ + {0x091c, CONSONANT, "dž"}, /* 05 */ + {0x0920, CONSONANT, "th"}, /* 06 */ + {0x0922, CONSONANT, "dh"}, /* 07 */ + {0x0925, CONSONANT, "th"}, /* 08 */ + {0x0927, CONSONANT, "dh"}, /* 09 */ + {0x092b, CONSONANT, "ph"}, /* 10 */ + {0x092d, CONSONANT, "bh"}, /* 11 */ + {0x0915, CONSONANT, "k"}, /* 12 */ + {0x0917, CONSONANT, "g"}, /* 13 */ + {0x0919, CONSONANT, "n"}, /* 14 */ + {0x0939, CONSONANT, "h"}, /* 15 */ + {0x091a, CONSONANT, "č"}, /* 16 */ + {0x091e, CONSONANT, "ň"}, /* 17 */ + {0x092f, CONSONANT, "j"}, /* 18 */ + {0x0936, CONSONANT, "š"}, /* 19 */ + {0x091F, CONSONANT, "t"}, /* 20 */ + {0x0921, CONSONANT, "d"}, /* 21 */ + {0x0923, CONSONANT, "n"}, /* 22 */ + {0x0930, CONSONANT, "r"}, /* 23 */ + {0x0937, CONSONANT, "š"}, /* 24 */ + {0x0924, CONSONANT, "t"}, /* 25 */ + {0x0926, CONSONANT, "d"}, /* 26 */ + {0x0928, CONSONANT, "n"}, /* 27 */ + {0x0932, CONSONANT, "l"}, /* 28 */ + {0x0938, CONSONANT, "s"}, /* 29 */ + {0x092a, CONSONANT, "p"}, /* 30 */ + {0x092c, CONSONANT, "b"}, /* 31 */ + {0x092e, CONSONANT, "m"}, /* 32 */ + {0x0935, CONSONANT, "v"}, /* 33 */ + {0x0933, CONSONANT, "l"}, + + /* Codas */ + {0x0902, CODA, "m"}, /* anusvara */ + {0x0903, CODA, ""}, /* visarga */ + {0x093d, CODA, "'"}, /* avagrada */ + {0x0901, CODA, "m"}, /* candrabindu */ + + /* Numbers */ + {0x0966, NUMBER, "0"}, + {0x0967, NUMBER, "1"}, + {0x0968, NUMBER, "2"}, + {0x0969, NUMBER, "3"}, + {0x096a, NUMBER, "4"}, + {0x096b, NUMBER, "5"}, + {0x096c, NUMBER, "6"}, + {0x096d, NUMBER, "7"}, + {0x096e, NUMBER, "8"}, + {0x096f, NUMBER, "9"}, + + /* Diacritic modifiers */ + {0x0948, VOWEL_SIGN, "ai"}, + {0x094c, VOWEL_SIGN, "au"}, + {0x093e, VOWEL_SIGN, "á"}, + {0x093f, VOWEL_SIGN, "i"}, + {0x0940, VOWEL_SIGN, "í"}, + {0x0941, VOWEL_SIGN, "u"}, + {0x0942, VOWEL_SIGN, "ú"}, + {0x0943, VOWEL_SIGN, "r"}, + {0x0944, VOWEL_SIGN, "r"}, + {0x0962, VOWEL_SIGN, "l"}, + {0x0963, VOWEL_SIGN, "l"}, + {0x0947, VOWEL_SIGN, "é"}, + {0x094b, VOWEL_SIGN, "ó"}, + {0x094d, VOWEL_SIGN, ""}, /* virama */ +}; + static inline int is_consonant(unsigned int c) { return (c >= 0x0915 && c <= 0x0939); @@ -67,13 +159,14 @@ static void end_of_word_filter(char *latin, unsigned int *pos, } } -static struct translit_letter *letter_by_code(struct translit_letter *table, - unsigned int c) +static struct translit_letter *letter_by_code(unsigned int c) { - while (table->code != 0) { - if (table->code == c) - return table; - table++; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + if (table[i].code == c) { + return table + i; + } } return NULL; @@ -81,13 +174,11 @@ static struct translit_letter *letter_by_code(struct translit_letter *table, int transcript_devanagari_to_czech(const char *devanagari, char **ret) { - struct translit_letter *table, *letter; + struct translit_letter *letter; unsigned int c, prev = 0, alloc = 0, done = 0, len; const char *src = devanagari; char *latin = NULL; - table = get_iast_czech_transliteration_table(); - while (1) { if (alloc < done + UNICODE_MAX_LENGTH) { latin = realloc(latin, alloc + CHUNKSIZE); @@ -105,7 +196,7 @@ int transcript_devanagari_to_czech(const char *devanagari, char **ret) return EHINDI; } - letter = letter_by_code(table, c); + letter = letter_by_code(c); if (letter) { switch (letter->type) { case CONSONANT: diff --git a/transliteration.c b/transliteration.c index 7dda25e..c1f7986 100644 --- a/transliteration.c +++ b/transliteration.c @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* https://en.wikipedia.org/wiki/IAST */ #include "compat.h" #include "transliteration.h" -#include "iast.h" #include "utf8.h" #define SCHWA_CHARACTER 'a' @@ -10,13 +10,106 @@ #define NUKTA 0x093c #define CHUNKSIZE 1024 -static struct translit_letter *letter_by_code(struct translit_letter *table, - unsigned int c) +static struct translit_letter table[] = { + + /* Special characters */ + {0x0950, SPECIAL, "aum"}, /* aum */ + + /* Vowels */ + {0x0910, VOWEL, "ai"}, /* 01 */ + {0x0914, VOWEL, "au"}, /* 02 */ + {0x0905, VOWEL, "a"}, /* 03 */ + {0x0906, VOWEL, "\u0101"}, /* 04 (aa) */ + {0x0907, VOWEL, "i"}, /* 05 */ + {0x0908, VOWEL, "\u012b"}, /* 06 (ii) */ + {0x0909, VOWEL, "u"}, /* 07 */ + {0x090a, VOWEL, "\u016b"}, /* 08 (uu) */ + {0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */ + {0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */ + {0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */ + {0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */ + {0x090f, VOWEL, "e"}, /* 13 */ + {0x0913, VOWEL, "o"}, /* 14 */ + + /* Consonants */ + {0x0916, CONSONANT, "kh"}, /* 01 */ + {0x0918, CONSONANT, "gh"}, /* 02 */ + {0x091b, CONSONANT, "ch"}, /* 03 */ + {0x091d, CONSONANT, "jh"}, /* 04 */ + {0x0920, CONSONANT, "\u1e6dh"}, /* 05 (.th) */ + {0x0922, CONSONANT, "\u1e0dh"}, /* 06 (.dh) */ + {0x0925, CONSONANT, "th"}, /* 07 */ + {0x0927, CONSONANT, "dh"}, /* 08 */ + {0x092b, CONSONANT, "ph"}, /* 09 */ + {0x092d, CONSONANT, "bh"}, /* 10 */ + {0x0915, CONSONANT, "k"}, /* 11 */ + {0x0917, CONSONANT, "g"}, /* 12 */ + {0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */ + {0x0939, CONSONANT, "h"}, /* 14 */ + {0x091a, CONSONANT, "c"}, /* 15 */ + {0x091c, CONSONANT, "j"}, /* 16 */ + {0x091e, CONSONANT, "\u00f1"}, /* 17 (~n) */ + {0x092f, CONSONANT, "y"}, /* 18 */ + {0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */ + {0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */ + {0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */ + {0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */ + {0x0930, CONSONANT, "r"}, /* 23 */ + {0x0937, CONSONANT, "\u1e63"}, /* 24 (.s) */ + {0x0924, CONSONANT, "t"}, /* 25 */ + {0x0926, CONSONANT, "d"}, /* 26 */ + {0x0928, CONSONANT, "n"}, /* 27 */ + {0x0932, CONSONANT, "l"}, /* 28 */ + {0x0938, CONSONANT, "s"}, /* 29 */ + {0x092a, CONSONANT, "p"}, /* 30 */ + {0x092c, CONSONANT, "b"}, /* 31 */ + {0x092e, CONSONANT, "m"}, /* 32 */ + {0x0935, CONSONANT, "v"}, /* 33 */ + {0x0933, CONSONANT, "\u1e37"}, /* (.l) */ + + /* Codas */ + {0x0902, CODA, "\u1e43"}, /* anusvara (.m) */ + {0x0903, CODA, "\u1e25"}, /* visarga (.h) */ + {0x093d, CODA, "'"}, /* avagrada (') */ + {0x0901, CODA, "m\u0310"}, /* candrabindu */ + + /* Numbers */ + {0x0966, NUMBER, "0"}, + {0x0967, NUMBER, "1"}, + {0x0968, NUMBER, "2"}, + {0x0969, NUMBER, "3"}, + {0x096a, NUMBER, "4"}, + {0x096b, NUMBER, "5"}, + {0x096c, NUMBER, "6"}, + {0x096d, NUMBER, "7"}, + {0x096e, NUMBER, "8"}, + {0x096f, NUMBER, "9"}, + + /* Diacritic modifiers */ + {0x0948, VOWEL_SIGN, "ai"}, + {0x094c, VOWEL_SIGN, "au"}, + {0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */ + {0x093f, VOWEL_SIGN, "i"}, + {0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */ + {0x0941, VOWEL_SIGN, "u"}, + {0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */ + {0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */ + {0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */ + {0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */ + {0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */ + {0x0947, VOWEL_SIGN, "e"}, + {0x094b, VOWEL_SIGN, "o"}, + {0x094d, VOWEL_SIGN, ""}, /* virama */ +}; + +static struct translit_letter *letter_by_code(unsigned int c) { - while (table->code != 0) { - if (table->code == c) - return table; - table++; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + if (table[i].code == c) { + return table + i; + } } return NULL; @@ -24,13 +117,11 @@ static struct translit_letter *letter_by_code(struct translit_letter *table, int transliterate_devanagari_to_latin(const char *devanagari, char **ret) { - struct translit_letter *table, *letter; + struct translit_letter *letter; unsigned int c, alloc = 0, done = 0, len; const char *src = devanagari; char *latin = NULL; - table = get_iast_transliteration_table(); - while (1) { if (alloc < done + UNICODE_MAX_LENGTH) { latin = realloc(latin, alloc + CHUNKSIZE); @@ -46,7 +137,7 @@ int transliterate_devanagari_to_latin(const char *devanagari, char **ret) return EHINDI; } - letter = letter_by_code(table, c); + letter = letter_by_code(c); if (letter) { switch (letter->type) { case CONSONANT: @@ -78,28 +169,30 @@ int transliterate_devanagari_to_latin(const char *devanagari, char **ret) return 0; } -static struct translit_letter *letter_by_data(struct translit_letter *table, - const char *data) +static struct translit_letter *letter_by_data(const char *data) { - while (table->code != 0) { - unsigned int len = strlen(table->data); - if (len && strncmp(table->data, data, len) == 0) - return table; - table++; + unsigned int i, len; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + len = strlen(table[i].data); + if (len && strncmp(table[i].data, data, len) == 0) { + return table + i; + } } return NULL; } -static struct translit_letter *vowel_sign_by_data(struct translit_letter *table, - const char *data) +static struct translit_letter *vowel_sign_by_data(const char *data) { - while (table->code != 0) { - unsigned int len = strlen(table->data); - if (len && strncmp(table->data, data, len) == 0 && - table->type == VOWEL_SIGN) - return table; - table++; + unsigned int i, len; + + for (i = 0; i < ARRAY_SIZE(table); i++) { + len = strlen(table[i].data); + if (len && strncmp(table[i].data, data, len) == 0 && + table[i].type == VOWEL_SIGN) { + return table + i; + } } return NULL; @@ -107,13 +200,11 @@ static struct translit_letter *vowel_sign_by_data(struct translit_letter *table, int transliterate_latin_to_devanagari(const char *latin, char **ret) { - struct translit_letter *table, *letter, *next; + struct translit_letter *letter, *next; unsigned int alloc = 0, done = 0, len; const char *src = latin; char *devanagari = NULL; - table = get_iast_transliteration_table(); - while (*src) { if (alloc < done + UNICODE_MAX_LENGTH) { devanagari = realloc(devanagari, alloc + CHUNKSIZE); @@ -122,7 +213,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret) /* consonant (.l) */ if (strncmp(src, "\u1e37", 3) == 0) { - letter = letter_by_data(table, src + 3); + letter = letter_by_data(src + 3); if (letter && letter->type == VOWEL) { utf8_pack_char(devanagari + done, 0x0933); done += 3; @@ -139,7 +230,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret) continue; } - letter = letter_by_data(table, src); + letter = letter_by_data(src); if (letter) { utf8_pack_char(devanagari + done, letter->code); len = utf8_char_length(letter->code); @@ -149,7 +240,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret) if (letter->type == VOWEL || letter->type == CODA) continue; encode_vowel_modifier: - next = vowel_sign_by_data(table, src); + next = vowel_sign_by_data(src); if (next) { utf8_pack_char(devanagari + done, next->code); done += utf8_char_length(next->code); diff --git a/velthuis.c b/velthuis.c index 5607d37..7a129c6 100644 --- a/velthuis.c +++ b/velthuis.c @@ -5,8 +5,6 @@ #include "velthuis.h" #include "utf8.h" -#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a) - struct encoder_tuple { const char *from; const char *to;