merge transliteration.c + iast.c and transcription.c + iast-czech.c
This commit is contained in:
parent
171f0fe634
commit
b5672070b6
10 changed files with 230 additions and 276 deletions
|
@ -15,13 +15,12 @@ mandir = @mandir@
|
||||||
USE_GCOV = @USE_GCOV@
|
USE_GCOV = @USE_GCOV@
|
||||||
|
|
||||||
CFLAGS = -Wall @CFLAGS@ @COVERAGE_CFLAGS@
|
CFLAGS = -Wall @CFLAGS@ @COVERAGE_CFLAGS@
|
||||||
LFLAGS =
|
LFLAGS = @COVERAGE_LFLAGS@
|
||||||
|
|
||||||
TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@
|
TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@
|
||||||
TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@
|
TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@
|
||||||
|
|
||||||
OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o \
|
OBJECTS = transliteration.o transcription.o utf8.o velthuis.o
|
||||||
velthuis.o
|
|
||||||
|
|
||||||
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \
|
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \
|
||||||
tests/velthuis.o tests/utf8.o tests/integration.o
|
tests/velthuis.o tests/utf8.o tests/integration.o
|
||||||
|
|
2
compat.h
2
compat.h
|
@ -10,6 +10,8 @@
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
|
#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a)
|
||||||
|
|
||||||
enum err {
|
enum err {
|
||||||
EHINDI = 1
|
EHINDI = 1
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
AC_PREREQ([2.69])
|
AC_PREREQ([2.69])
|
||||||
AC_INIT([sanskrit-iast], [2.0.0], [vlastavesely@protonmail.ch])
|
AC_INIT([sanskrit-iast], [2.0.0], [vlastavesely@protonmail.ch])
|
||||||
AC_CONFIG_SRCDIR([iast.c])
|
AC_CONFIG_SRCDIR([transliteration.c])
|
||||||
AC_CONFIG_HEADERS([config.h])
|
AC_CONFIG_HEADERS([config.h])
|
||||||
|
|
||||||
PROGNAME=iast
|
PROGNAME=iast
|
||||||
|
|
103
iast-czech.c
103
iast-czech.c
|
@ -1,103 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#include "compat.h"
|
|
||||||
#include "iast-czech.h"
|
|
||||||
|
|
||||||
static struct translit_letter table[] = {
|
|
||||||
|
|
||||||
/* Special characters */
|
|
||||||
{0x0950, SPECIAL, "óm"}, /* aum */
|
|
||||||
|
|
||||||
/* Vowels */
|
|
||||||
{0x0910, VOWEL, "ai"}, /* 01 */
|
|
||||||
{0x0914, VOWEL, "au"}, /* 02 */
|
|
||||||
{0x0905, VOWEL, "a"}, /* 03 */
|
|
||||||
{0x0906, VOWEL, "á"}, /* 04 */
|
|
||||||
{0x0907, VOWEL, "i"}, /* 05 */
|
|
||||||
{0x0908, VOWEL, "í"}, /* 06 */
|
|
||||||
{0x0909, VOWEL, "u"}, /* 07 */
|
|
||||||
{0x090a, VOWEL, "ú"}, /* 08 */
|
|
||||||
{0x090b, VOWEL, "r"}, /* 09 */
|
|
||||||
{0x0960, VOWEL, "r"}, /* 10 */
|
|
||||||
{0x090c, VOWEL, "l"}, /* 11 */
|
|
||||||
{0x0961, VOWEL, "l"}, /* 12 */
|
|
||||||
{0x090f, VOWEL, "é"}, /* 13 */
|
|
||||||
{0x0913, VOWEL, "ó"}, /* 14 */
|
|
||||||
|
|
||||||
/* Consonants */
|
|
||||||
{0x0916, CONSONANT, "kh"}, /* 01 */
|
|
||||||
{0x0918, CONSONANT, "gh"}, /* 02 */
|
|
||||||
{0x091b, CONSONANT, "čh"}, /* 03 */
|
|
||||||
{0x091d, CONSONANT, "džh"}, /* 04 */
|
|
||||||
{0x091c, CONSONANT, "dž"}, /* 05 */
|
|
||||||
{0x0920, CONSONANT, "th"}, /* 06 */
|
|
||||||
{0x0922, CONSONANT, "dh"}, /* 07 */
|
|
||||||
{0x0925, CONSONANT, "th"}, /* 08 */
|
|
||||||
{0x0927, CONSONANT, "dh"}, /* 09 */
|
|
||||||
{0x092b, CONSONANT, "ph"}, /* 10 */
|
|
||||||
{0x092d, CONSONANT, "bh"}, /* 11 */
|
|
||||||
{0x0915, CONSONANT, "k"}, /* 12 */
|
|
||||||
{0x0917, CONSONANT, "g"}, /* 13 */
|
|
||||||
{0x0919, CONSONANT, "n"}, /* 14 */
|
|
||||||
{0x0939, CONSONANT, "h"}, /* 15 */
|
|
||||||
{0x091a, CONSONANT, "č"}, /* 16 */
|
|
||||||
{0x091e, CONSONANT, "ň"}, /* 17 */
|
|
||||||
{0x092f, CONSONANT, "j"}, /* 18 */
|
|
||||||
{0x0936, CONSONANT, "š"}, /* 19 */
|
|
||||||
{0x091F, CONSONANT, "t"}, /* 20 */
|
|
||||||
{0x0921, CONSONANT, "d"}, /* 21 */
|
|
||||||
{0x0923, CONSONANT, "n"}, /* 22 */
|
|
||||||
{0x0930, CONSONANT, "r"}, /* 23 */
|
|
||||||
{0x0937, CONSONANT, "š"}, /* 24 */
|
|
||||||
{0x0924, CONSONANT, "t"}, /* 25 */
|
|
||||||
{0x0926, CONSONANT, "d"}, /* 26 */
|
|
||||||
{0x0928, CONSONANT, "n"}, /* 27 */
|
|
||||||
{0x0932, CONSONANT, "l"}, /* 28 */
|
|
||||||
{0x0938, CONSONANT, "s"}, /* 29 */
|
|
||||||
{0x092a, CONSONANT, "p"}, /* 30 */
|
|
||||||
{0x092c, CONSONANT, "b"}, /* 31 */
|
|
||||||
{0x092e, CONSONANT, "m"}, /* 32 */
|
|
||||||
{0x0935, CONSONANT, "v"}, /* 33 */
|
|
||||||
{0x0933, CONSONANT, "l"},
|
|
||||||
|
|
||||||
/* Codas */
|
|
||||||
{0x0902, CODA, "m"}, /* anusvara */
|
|
||||||
{0x0903, CODA, ""}, /* visarga */
|
|
||||||
{0x093d, CODA, "'"}, /* avagrada */
|
|
||||||
{0x0901, CODA, "m"}, /* candrabindu */
|
|
||||||
|
|
||||||
/* Numbers */
|
|
||||||
{0x0966, NUMBER, "0"},
|
|
||||||
{0x0967, NUMBER, "1"},
|
|
||||||
{0x0968, NUMBER, "2"},
|
|
||||||
{0x0969, NUMBER, "3"},
|
|
||||||
{0x096a, NUMBER, "4"},
|
|
||||||
{0x096b, NUMBER, "5"},
|
|
||||||
{0x096c, NUMBER, "6"},
|
|
||||||
{0x096d, NUMBER, "7"},
|
|
||||||
{0x096e, NUMBER, "8"},
|
|
||||||
{0x096f, NUMBER, "9"},
|
|
||||||
|
|
||||||
/* Diacritic modifiers */
|
|
||||||
{0x0948, VOWEL_SIGN, "ai"},
|
|
||||||
{0x094c, VOWEL_SIGN, "au"},
|
|
||||||
{0x093e, VOWEL_SIGN, "á"},
|
|
||||||
{0x093f, VOWEL_SIGN, "i"},
|
|
||||||
{0x0940, VOWEL_SIGN, "í"},
|
|
||||||
{0x0941, VOWEL_SIGN, "u"},
|
|
||||||
{0x0942, VOWEL_SIGN, "ú"},
|
|
||||||
{0x0943, VOWEL_SIGN, "r"},
|
|
||||||
{0x0944, VOWEL_SIGN, "r"},
|
|
||||||
{0x0962, VOWEL_SIGN, "l"},
|
|
||||||
{0x0963, VOWEL_SIGN, "l"},
|
|
||||||
{0x0947, VOWEL_SIGN, "é"},
|
|
||||||
{0x094b, VOWEL_SIGN, "ó"},
|
|
||||||
{0x094d, VOWEL_SIGN, ""}, /* virama */
|
|
||||||
|
|
||||||
{0, 0, NULL}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct translit_letter *get_iast_czech_transliteration_table()
|
|
||||||
{
|
|
||||||
return table;
|
|
||||||
}
|
|
10
iast-czech.h
10
iast-czech.h
|
@ -1,10 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#ifndef __IAST_CZECH_H
|
|
||||||
#define __IAST_CZECH_H
|
|
||||||
|
|
||||||
#include "transliteration.h"
|
|
||||||
|
|
||||||
struct translit_letter *get_iast_czech_transliteration_table();
|
|
||||||
|
|
||||||
#endif /* __IAST_CZECH_H */
|
|
104
iast.c
104
iast.c
|
@ -1,104 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/* https://en.wikipedia.org/wiki/IAST */
|
|
||||||
|
|
||||||
#include "compat.h"
|
|
||||||
#include "iast.h"
|
|
||||||
|
|
||||||
static struct translit_letter table[] = {
|
|
||||||
|
|
||||||
/* Special characters */
|
|
||||||
{0x0950, SPECIAL, "aum"}, /* aum */
|
|
||||||
|
|
||||||
/* Vowels */
|
|
||||||
{0x0910, VOWEL, "ai"}, /* 01 */
|
|
||||||
{0x0914, VOWEL, "au"}, /* 02 */
|
|
||||||
{0x0905, VOWEL, "a"}, /* 03 */
|
|
||||||
{0x0906, VOWEL, "\u0101"}, /* 04 (aa) */
|
|
||||||
{0x0907, VOWEL, "i"}, /* 05 */
|
|
||||||
{0x0908, VOWEL, "\u012b"}, /* 06 (ii) */
|
|
||||||
{0x0909, VOWEL, "u"}, /* 07 */
|
|
||||||
{0x090a, VOWEL, "\u016b"}, /* 08 (uu) */
|
|
||||||
{0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */
|
|
||||||
{0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */
|
|
||||||
{0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */
|
|
||||||
{0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */
|
|
||||||
{0x090f, VOWEL, "e"}, /* 13 */
|
|
||||||
{0x0913, VOWEL, "o"}, /* 14 */
|
|
||||||
|
|
||||||
/* Consonants */
|
|
||||||
{0x0916, CONSONANT, "kh"}, /* 01 */
|
|
||||||
{0x0918, CONSONANT, "gh"}, /* 02 */
|
|
||||||
{0x091b, CONSONANT, "ch"}, /* 03 */
|
|
||||||
{0x091d, CONSONANT, "jh"}, /* 04 */
|
|
||||||
{0x0920, CONSONANT, "\u1e6dh"}, /* 05 (.th) */
|
|
||||||
{0x0922, CONSONANT, "\u1e0dh"}, /* 06 (.dh) */
|
|
||||||
{0x0925, CONSONANT, "th"}, /* 07 */
|
|
||||||
{0x0927, CONSONANT, "dh"}, /* 08 */
|
|
||||||
{0x092b, CONSONANT, "ph"}, /* 09 */
|
|
||||||
{0x092d, CONSONANT, "bh"}, /* 10 */
|
|
||||||
{0x0915, CONSONANT, "k"}, /* 11 */
|
|
||||||
{0x0917, CONSONANT, "g"}, /* 12 */
|
|
||||||
{0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */
|
|
||||||
{0x0939, CONSONANT, "h"}, /* 14 */
|
|
||||||
{0x091a, CONSONANT, "c"}, /* 15 */
|
|
||||||
{0x091c, CONSONANT, "j"}, /* 16 */
|
|
||||||
{0x091e, CONSONANT, "\u00f1"}, /* 17 (~n) */
|
|
||||||
{0x092f, CONSONANT, "y"}, /* 18 */
|
|
||||||
{0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */
|
|
||||||
{0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */
|
|
||||||
{0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */
|
|
||||||
{0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */
|
|
||||||
{0x0930, CONSONANT, "r"}, /* 23 */
|
|
||||||
{0x0937, CONSONANT, "\u1e63"}, /* 24 (.s) */
|
|
||||||
{0x0924, CONSONANT, "t"}, /* 25 */
|
|
||||||
{0x0926, CONSONANT, "d"}, /* 26 */
|
|
||||||
{0x0928, CONSONANT, "n"}, /* 27 */
|
|
||||||
{0x0932, CONSONANT, "l"}, /* 28 */
|
|
||||||
{0x0938, CONSONANT, "s"}, /* 29 */
|
|
||||||
{0x092a, CONSONANT, "p"}, /* 30 */
|
|
||||||
{0x092c, CONSONANT, "b"}, /* 31 */
|
|
||||||
{0x092e, CONSONANT, "m"}, /* 32 */
|
|
||||||
{0x0935, CONSONANT, "v"}, /* 33 */
|
|
||||||
{0x0933, CONSONANT, "\u1e37"}, /* (.l) */
|
|
||||||
|
|
||||||
/* Codas */
|
|
||||||
{0x0902, CODA, "\u1e43"}, /* anusvara (.m) */
|
|
||||||
{0x0903, CODA, "\u1e25"}, /* visarga (.h) */
|
|
||||||
{0x093d, CODA, "'"}, /* avagrada (') */
|
|
||||||
{0x0901, CODA, "m\u0310"}, /* candrabindu */
|
|
||||||
|
|
||||||
/* Numbers */
|
|
||||||
{0x0966, NUMBER, "0"},
|
|
||||||
{0x0967, NUMBER, "1"},
|
|
||||||
{0x0968, NUMBER, "2"},
|
|
||||||
{0x0969, NUMBER, "3"},
|
|
||||||
{0x096a, NUMBER, "4"},
|
|
||||||
{0x096b, NUMBER, "5"},
|
|
||||||
{0x096c, NUMBER, "6"},
|
|
||||||
{0x096d, NUMBER, "7"},
|
|
||||||
{0x096e, NUMBER, "8"},
|
|
||||||
{0x096f, NUMBER, "9"},
|
|
||||||
|
|
||||||
/* Diacritic modifiers */
|
|
||||||
{0x0948, VOWEL_SIGN, "ai"},
|
|
||||||
{0x094c, VOWEL_SIGN, "au"},
|
|
||||||
{0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */
|
|
||||||
{0x093f, VOWEL_SIGN, "i"},
|
|
||||||
{0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */
|
|
||||||
{0x0941, VOWEL_SIGN, "u"},
|
|
||||||
{0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */
|
|
||||||
{0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */
|
|
||||||
{0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */
|
|
||||||
{0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */
|
|
||||||
{0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */
|
|
||||||
{0x0947, VOWEL_SIGN, "e"},
|
|
||||||
{0x094b, VOWEL_SIGN, "o"},
|
|
||||||
{0x094d, VOWEL_SIGN, ""}, /* virama */
|
|
||||||
|
|
||||||
{0, 0, NULL}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct translit_letter *get_iast_transliteration_table()
|
|
||||||
{
|
|
||||||
return table;
|
|
||||||
}
|
|
10
iast.h
10
iast.h
|
@ -1,10 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
|
|
||||||
#ifndef __IAST_H
|
|
||||||
#define __IAST_H
|
|
||||||
|
|
||||||
#include "transliteration.h"
|
|
||||||
|
|
||||||
struct translit_letter *get_iast_transliteration_table();
|
|
||||||
|
|
||||||
#endif /* __IAST_H */
|
|
113
transcription.c
113
transcription.c
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
#include "transcription.h"
|
#include "transcription.h"
|
||||||
#include "iast-czech.h"
|
#include "transliteration.h"
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
|
||||||
#define SCHWA_CHARACTER 'a'
|
#define SCHWA_CHARACTER 'a'
|
||||||
|
@ -11,6 +11,98 @@
|
||||||
#define NUKTA 0x093c
|
#define NUKTA 0x093c
|
||||||
#define CHUNKSIZE 1024
|
#define CHUNKSIZE 1024
|
||||||
|
|
||||||
|
static struct translit_letter table[] = {
|
||||||
|
|
||||||
|
/* Special characters */
|
||||||
|
{0x0950, SPECIAL, "óm"}, /* aum */
|
||||||
|
|
||||||
|
/* Vowels */
|
||||||
|
{0x0910, VOWEL, "ai"}, /* 01 */
|
||||||
|
{0x0914, VOWEL, "au"}, /* 02 */
|
||||||
|
{0x0905, VOWEL, "a"}, /* 03 */
|
||||||
|
{0x0906, VOWEL, "á"}, /* 04 */
|
||||||
|
{0x0907, VOWEL, "i"}, /* 05 */
|
||||||
|
{0x0908, VOWEL, "í"}, /* 06 */
|
||||||
|
{0x0909, VOWEL, "u"}, /* 07 */
|
||||||
|
{0x090a, VOWEL, "ú"}, /* 08 */
|
||||||
|
{0x090b, VOWEL, "r"}, /* 09 */
|
||||||
|
{0x0960, VOWEL, "r"}, /* 10 */
|
||||||
|
{0x090c, VOWEL, "l"}, /* 11 */
|
||||||
|
{0x0961, VOWEL, "l"}, /* 12 */
|
||||||
|
{0x090f, VOWEL, "é"}, /* 13 */
|
||||||
|
{0x0913, VOWEL, "ó"}, /* 14 */
|
||||||
|
|
||||||
|
/* Consonants */
|
||||||
|
{0x0916, CONSONANT, "kh"}, /* 01 */
|
||||||
|
{0x0918, CONSONANT, "gh"}, /* 02 */
|
||||||
|
{0x091b, CONSONANT, "čh"}, /* 03 */
|
||||||
|
{0x091d, CONSONANT, "džh"}, /* 04 */
|
||||||
|
{0x091c, CONSONANT, "dž"}, /* 05 */
|
||||||
|
{0x0920, CONSONANT, "th"}, /* 06 */
|
||||||
|
{0x0922, CONSONANT, "dh"}, /* 07 */
|
||||||
|
{0x0925, CONSONANT, "th"}, /* 08 */
|
||||||
|
{0x0927, CONSONANT, "dh"}, /* 09 */
|
||||||
|
{0x092b, CONSONANT, "ph"}, /* 10 */
|
||||||
|
{0x092d, CONSONANT, "bh"}, /* 11 */
|
||||||
|
{0x0915, CONSONANT, "k"}, /* 12 */
|
||||||
|
{0x0917, CONSONANT, "g"}, /* 13 */
|
||||||
|
{0x0919, CONSONANT, "n"}, /* 14 */
|
||||||
|
{0x0939, CONSONANT, "h"}, /* 15 */
|
||||||
|
{0x091a, CONSONANT, "č"}, /* 16 */
|
||||||
|
{0x091e, CONSONANT, "ň"}, /* 17 */
|
||||||
|
{0x092f, CONSONANT, "j"}, /* 18 */
|
||||||
|
{0x0936, CONSONANT, "š"}, /* 19 */
|
||||||
|
{0x091F, CONSONANT, "t"}, /* 20 */
|
||||||
|
{0x0921, CONSONANT, "d"}, /* 21 */
|
||||||
|
{0x0923, CONSONANT, "n"}, /* 22 */
|
||||||
|
{0x0930, CONSONANT, "r"}, /* 23 */
|
||||||
|
{0x0937, CONSONANT, "š"}, /* 24 */
|
||||||
|
{0x0924, CONSONANT, "t"}, /* 25 */
|
||||||
|
{0x0926, CONSONANT, "d"}, /* 26 */
|
||||||
|
{0x0928, CONSONANT, "n"}, /* 27 */
|
||||||
|
{0x0932, CONSONANT, "l"}, /* 28 */
|
||||||
|
{0x0938, CONSONANT, "s"}, /* 29 */
|
||||||
|
{0x092a, CONSONANT, "p"}, /* 30 */
|
||||||
|
{0x092c, CONSONANT, "b"}, /* 31 */
|
||||||
|
{0x092e, CONSONANT, "m"}, /* 32 */
|
||||||
|
{0x0935, CONSONANT, "v"}, /* 33 */
|
||||||
|
{0x0933, CONSONANT, "l"},
|
||||||
|
|
||||||
|
/* Codas */
|
||||||
|
{0x0902, CODA, "m"}, /* anusvara */
|
||||||
|
{0x0903, CODA, ""}, /* visarga */
|
||||||
|
{0x093d, CODA, "'"}, /* avagrada */
|
||||||
|
{0x0901, CODA, "m"}, /* candrabindu */
|
||||||
|
|
||||||
|
/* Numbers */
|
||||||
|
{0x0966, NUMBER, "0"},
|
||||||
|
{0x0967, NUMBER, "1"},
|
||||||
|
{0x0968, NUMBER, "2"},
|
||||||
|
{0x0969, NUMBER, "3"},
|
||||||
|
{0x096a, NUMBER, "4"},
|
||||||
|
{0x096b, NUMBER, "5"},
|
||||||
|
{0x096c, NUMBER, "6"},
|
||||||
|
{0x096d, NUMBER, "7"},
|
||||||
|
{0x096e, NUMBER, "8"},
|
||||||
|
{0x096f, NUMBER, "9"},
|
||||||
|
|
||||||
|
/* Diacritic modifiers */
|
||||||
|
{0x0948, VOWEL_SIGN, "ai"},
|
||||||
|
{0x094c, VOWEL_SIGN, "au"},
|
||||||
|
{0x093e, VOWEL_SIGN, "á"},
|
||||||
|
{0x093f, VOWEL_SIGN, "i"},
|
||||||
|
{0x0940, VOWEL_SIGN, "í"},
|
||||||
|
{0x0941, VOWEL_SIGN, "u"},
|
||||||
|
{0x0942, VOWEL_SIGN, "ú"},
|
||||||
|
{0x0943, VOWEL_SIGN, "r"},
|
||||||
|
{0x0944, VOWEL_SIGN, "r"},
|
||||||
|
{0x0962, VOWEL_SIGN, "l"},
|
||||||
|
{0x0963, VOWEL_SIGN, "l"},
|
||||||
|
{0x0947, VOWEL_SIGN, "é"},
|
||||||
|
{0x094b, VOWEL_SIGN, "ó"},
|
||||||
|
{0x094d, VOWEL_SIGN, ""}, /* virama */
|
||||||
|
};
|
||||||
|
|
||||||
static inline int is_consonant(unsigned int c)
|
static inline int is_consonant(unsigned int c)
|
||||||
{
|
{
|
||||||
return (c >= 0x0915 && c <= 0x0939);
|
return (c >= 0x0915 && c <= 0x0939);
|
||||||
|
@ -67,13 +159,14 @@ static void end_of_word_filter(char *latin, unsigned int *pos,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
static struct translit_letter *letter_by_code(unsigned int c)
|
||||||
unsigned int c)
|
|
||||||
{
|
{
|
||||||
while (table->code != 0) {
|
unsigned int i;
|
||||||
if (table->code == c)
|
|
||||||
return table;
|
for (i = 0; i < ARRAY_SIZE(table); i++) {
|
||||||
table++;
|
if (table[i].code == c) {
|
||||||
|
return table + i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -81,13 +174,11 @@ static struct translit_letter *letter_by_code(struct translit_letter *table,
|
||||||
|
|
||||||
int transcript_devanagari_to_czech(const char *devanagari, char **ret)
|
int transcript_devanagari_to_czech(const char *devanagari, char **ret)
|
||||||
{
|
{
|
||||||
struct translit_letter *table, *letter;
|
struct translit_letter *letter;
|
||||||
unsigned int c, prev = 0, alloc = 0, done = 0, len;
|
unsigned int c, prev = 0, alloc = 0, done = 0, len;
|
||||||
const char *src = devanagari;
|
const char *src = devanagari;
|
||||||
char *latin = NULL;
|
char *latin = NULL;
|
||||||
|
|
||||||
table = get_iast_czech_transliteration_table();
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (alloc < done + UNICODE_MAX_LENGTH) {
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||||
latin = realloc(latin, alloc + CHUNKSIZE);
|
latin = realloc(latin, alloc + CHUNKSIZE);
|
||||||
|
@ -105,7 +196,7 @@ int transcript_devanagari_to_czech(const char *devanagari, char **ret)
|
||||||
return EHINDI;
|
return EHINDI;
|
||||||
}
|
}
|
||||||
|
|
||||||
letter = letter_by_code(table, c);
|
letter = letter_by_code(c);
|
||||||
if (letter) {
|
if (letter) {
|
||||||
switch (letter->type) {
|
switch (letter->type) {
|
||||||
case CONSONANT:
|
case CONSONANT:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/* https://en.wikipedia.org/wiki/IAST */
|
||||||
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
#include "transliteration.h"
|
#include "transliteration.h"
|
||||||
#include "iast.h"
|
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
|
||||||
#define SCHWA_CHARACTER 'a'
|
#define SCHWA_CHARACTER 'a'
|
||||||
|
@ -10,13 +10,106 @@
|
||||||
#define NUKTA 0x093c
|
#define NUKTA 0x093c
|
||||||
#define CHUNKSIZE 1024
|
#define CHUNKSIZE 1024
|
||||||
|
|
||||||
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
static struct translit_letter table[] = {
|
||||||
unsigned int c)
|
|
||||||
|
/* Special characters */
|
||||||
|
{0x0950, SPECIAL, "aum"}, /* aum */
|
||||||
|
|
||||||
|
/* Vowels */
|
||||||
|
{0x0910, VOWEL, "ai"}, /* 01 */
|
||||||
|
{0x0914, VOWEL, "au"}, /* 02 */
|
||||||
|
{0x0905, VOWEL, "a"}, /* 03 */
|
||||||
|
{0x0906, VOWEL, "\u0101"}, /* 04 (aa) */
|
||||||
|
{0x0907, VOWEL, "i"}, /* 05 */
|
||||||
|
{0x0908, VOWEL, "\u012b"}, /* 06 (ii) */
|
||||||
|
{0x0909, VOWEL, "u"}, /* 07 */
|
||||||
|
{0x090a, VOWEL, "\u016b"}, /* 08 (uu) */
|
||||||
|
{0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */
|
||||||
|
{0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */
|
||||||
|
{0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */
|
||||||
|
{0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */
|
||||||
|
{0x090f, VOWEL, "e"}, /* 13 */
|
||||||
|
{0x0913, VOWEL, "o"}, /* 14 */
|
||||||
|
|
||||||
|
/* Consonants */
|
||||||
|
{0x0916, CONSONANT, "kh"}, /* 01 */
|
||||||
|
{0x0918, CONSONANT, "gh"}, /* 02 */
|
||||||
|
{0x091b, CONSONANT, "ch"}, /* 03 */
|
||||||
|
{0x091d, CONSONANT, "jh"}, /* 04 */
|
||||||
|
{0x0920, CONSONANT, "\u1e6dh"}, /* 05 (.th) */
|
||||||
|
{0x0922, CONSONANT, "\u1e0dh"}, /* 06 (.dh) */
|
||||||
|
{0x0925, CONSONANT, "th"}, /* 07 */
|
||||||
|
{0x0927, CONSONANT, "dh"}, /* 08 */
|
||||||
|
{0x092b, CONSONANT, "ph"}, /* 09 */
|
||||||
|
{0x092d, CONSONANT, "bh"}, /* 10 */
|
||||||
|
{0x0915, CONSONANT, "k"}, /* 11 */
|
||||||
|
{0x0917, CONSONANT, "g"}, /* 12 */
|
||||||
|
{0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */
|
||||||
|
{0x0939, CONSONANT, "h"}, /* 14 */
|
||||||
|
{0x091a, CONSONANT, "c"}, /* 15 */
|
||||||
|
{0x091c, CONSONANT, "j"}, /* 16 */
|
||||||
|
{0x091e, CONSONANT, "\u00f1"}, /* 17 (~n) */
|
||||||
|
{0x092f, CONSONANT, "y"}, /* 18 */
|
||||||
|
{0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */
|
||||||
|
{0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */
|
||||||
|
{0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */
|
||||||
|
{0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */
|
||||||
|
{0x0930, CONSONANT, "r"}, /* 23 */
|
||||||
|
{0x0937, CONSONANT, "\u1e63"}, /* 24 (.s) */
|
||||||
|
{0x0924, CONSONANT, "t"}, /* 25 */
|
||||||
|
{0x0926, CONSONANT, "d"}, /* 26 */
|
||||||
|
{0x0928, CONSONANT, "n"}, /* 27 */
|
||||||
|
{0x0932, CONSONANT, "l"}, /* 28 */
|
||||||
|
{0x0938, CONSONANT, "s"}, /* 29 */
|
||||||
|
{0x092a, CONSONANT, "p"}, /* 30 */
|
||||||
|
{0x092c, CONSONANT, "b"}, /* 31 */
|
||||||
|
{0x092e, CONSONANT, "m"}, /* 32 */
|
||||||
|
{0x0935, CONSONANT, "v"}, /* 33 */
|
||||||
|
{0x0933, CONSONANT, "\u1e37"}, /* (.l) */
|
||||||
|
|
||||||
|
/* Codas */
|
||||||
|
{0x0902, CODA, "\u1e43"}, /* anusvara (.m) */
|
||||||
|
{0x0903, CODA, "\u1e25"}, /* visarga (.h) */
|
||||||
|
{0x093d, CODA, "'"}, /* avagrada (') */
|
||||||
|
{0x0901, CODA, "m\u0310"}, /* candrabindu */
|
||||||
|
|
||||||
|
/* Numbers */
|
||||||
|
{0x0966, NUMBER, "0"},
|
||||||
|
{0x0967, NUMBER, "1"},
|
||||||
|
{0x0968, NUMBER, "2"},
|
||||||
|
{0x0969, NUMBER, "3"},
|
||||||
|
{0x096a, NUMBER, "4"},
|
||||||
|
{0x096b, NUMBER, "5"},
|
||||||
|
{0x096c, NUMBER, "6"},
|
||||||
|
{0x096d, NUMBER, "7"},
|
||||||
|
{0x096e, NUMBER, "8"},
|
||||||
|
{0x096f, NUMBER, "9"},
|
||||||
|
|
||||||
|
/* Diacritic modifiers */
|
||||||
|
{0x0948, VOWEL_SIGN, "ai"},
|
||||||
|
{0x094c, VOWEL_SIGN, "au"},
|
||||||
|
{0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */
|
||||||
|
{0x093f, VOWEL_SIGN, "i"},
|
||||||
|
{0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */
|
||||||
|
{0x0941, VOWEL_SIGN, "u"},
|
||||||
|
{0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */
|
||||||
|
{0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */
|
||||||
|
{0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */
|
||||||
|
{0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */
|
||||||
|
{0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */
|
||||||
|
{0x0947, VOWEL_SIGN, "e"},
|
||||||
|
{0x094b, VOWEL_SIGN, "o"},
|
||||||
|
{0x094d, VOWEL_SIGN, ""}, /* virama */
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct translit_letter *letter_by_code(unsigned int c)
|
||||||
{
|
{
|
||||||
while (table->code != 0) {
|
unsigned int i;
|
||||||
if (table->code == c)
|
|
||||||
return table;
|
for (i = 0; i < ARRAY_SIZE(table); i++) {
|
||||||
table++;
|
if (table[i].code == c) {
|
||||||
|
return table + i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -24,13 +117,11 @@ static struct translit_letter *letter_by_code(struct translit_letter *table,
|
||||||
|
|
||||||
int transliterate_devanagari_to_latin(const char *devanagari, char **ret)
|
int transliterate_devanagari_to_latin(const char *devanagari, char **ret)
|
||||||
{
|
{
|
||||||
struct translit_letter *table, *letter;
|
struct translit_letter *letter;
|
||||||
unsigned int c, alloc = 0, done = 0, len;
|
unsigned int c, alloc = 0, done = 0, len;
|
||||||
const char *src = devanagari;
|
const char *src = devanagari;
|
||||||
char *latin = NULL;
|
char *latin = NULL;
|
||||||
|
|
||||||
table = get_iast_transliteration_table();
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (alloc < done + UNICODE_MAX_LENGTH) {
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||||
latin = realloc(latin, alloc + CHUNKSIZE);
|
latin = realloc(latin, alloc + CHUNKSIZE);
|
||||||
|
@ -46,7 +137,7 @@ int transliterate_devanagari_to_latin(const char *devanagari, char **ret)
|
||||||
return EHINDI;
|
return EHINDI;
|
||||||
}
|
}
|
||||||
|
|
||||||
letter = letter_by_code(table, c);
|
letter = letter_by_code(c);
|
||||||
if (letter) {
|
if (letter) {
|
||||||
switch (letter->type) {
|
switch (letter->type) {
|
||||||
case CONSONANT:
|
case CONSONANT:
|
||||||
|
@ -78,28 +169,30 @@ int transliterate_devanagari_to_latin(const char *devanagari, char **ret)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct translit_letter *letter_by_data(struct translit_letter *table,
|
static struct translit_letter *letter_by_data(const char *data)
|
||||||
const char *data)
|
|
||||||
{
|
{
|
||||||
while (table->code != 0) {
|
unsigned int i, len;
|
||||||
unsigned int len = strlen(table->data);
|
|
||||||
if (len && strncmp(table->data, data, len) == 0)
|
for (i = 0; i < ARRAY_SIZE(table); i++) {
|
||||||
return table;
|
len = strlen(table[i].data);
|
||||||
table++;
|
if (len && strncmp(table[i].data, data, len) == 0) {
|
||||||
|
return table + i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct translit_letter *vowel_sign_by_data(struct translit_letter *table,
|
static struct translit_letter *vowel_sign_by_data(const char *data)
|
||||||
const char *data)
|
|
||||||
{
|
{
|
||||||
while (table->code != 0) {
|
unsigned int i, len;
|
||||||
unsigned int len = strlen(table->data);
|
|
||||||
if (len && strncmp(table->data, data, len) == 0 &&
|
for (i = 0; i < ARRAY_SIZE(table); i++) {
|
||||||
table->type == VOWEL_SIGN)
|
len = strlen(table[i].data);
|
||||||
return table;
|
if (len && strncmp(table[i].data, data, len) == 0 &&
|
||||||
table++;
|
table[i].type == VOWEL_SIGN) {
|
||||||
|
return table + i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -107,13 +200,11 @@ static struct translit_letter *vowel_sign_by_data(struct translit_letter *table,
|
||||||
|
|
||||||
int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
||||||
{
|
{
|
||||||
struct translit_letter *table, *letter, *next;
|
struct translit_letter *letter, *next;
|
||||||
unsigned int alloc = 0, done = 0, len;
|
unsigned int alloc = 0, done = 0, len;
|
||||||
const char *src = latin;
|
const char *src = latin;
|
||||||
char *devanagari = NULL;
|
char *devanagari = NULL;
|
||||||
|
|
||||||
table = get_iast_transliteration_table();
|
|
||||||
|
|
||||||
while (*src) {
|
while (*src) {
|
||||||
if (alloc < done + UNICODE_MAX_LENGTH) {
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||||
devanagari = realloc(devanagari, alloc + CHUNKSIZE);
|
devanagari = realloc(devanagari, alloc + CHUNKSIZE);
|
||||||
|
@ -122,7 +213,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
||||||
|
|
||||||
/* consonant (.l) */
|
/* consonant (.l) */
|
||||||
if (strncmp(src, "\u1e37", 3) == 0) {
|
if (strncmp(src, "\u1e37", 3) == 0) {
|
||||||
letter = letter_by_data(table, src + 3);
|
letter = letter_by_data(src + 3);
|
||||||
if (letter && letter->type == VOWEL) {
|
if (letter && letter->type == VOWEL) {
|
||||||
utf8_pack_char(devanagari + done, 0x0933);
|
utf8_pack_char(devanagari + done, 0x0933);
|
||||||
done += 3;
|
done += 3;
|
||||||
|
@ -139,7 +230,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
letter = letter_by_data(table, src);
|
letter = letter_by_data(src);
|
||||||
if (letter) {
|
if (letter) {
|
||||||
utf8_pack_char(devanagari + done, letter->code);
|
utf8_pack_char(devanagari + done, letter->code);
|
||||||
len = utf8_char_length(letter->code);
|
len = utf8_char_length(letter->code);
|
||||||
|
@ -149,7 +240,7 @@ int transliterate_latin_to_devanagari(const char *latin, char **ret)
|
||||||
if (letter->type == VOWEL || letter->type == CODA)
|
if (letter->type == VOWEL || letter->type == CODA)
|
||||||
continue;
|
continue;
|
||||||
encode_vowel_modifier:
|
encode_vowel_modifier:
|
||||||
next = vowel_sign_by_data(table, src);
|
next = vowel_sign_by_data(src);
|
||||||
if (next) {
|
if (next) {
|
||||||
utf8_pack_char(devanagari + done, next->code);
|
utf8_pack_char(devanagari + done, next->code);
|
||||||
done += utf8_char_length(next->code);
|
done += utf8_char_length(next->code);
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
#include "velthuis.h"
|
#include "velthuis.h"
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
|
||||||
#define ARRAY_SIZE(a) sizeof(a) / sizeof(*a)
|
|
||||||
|
|
||||||
struct encoder_tuple {
|
struct encoder_tuple {
|
||||||
const char *from;
|
const char *from;
|
||||||
const char *to;
|
const char *to;
|
||||||
|
|
Loading…
Reference in a new issue