iast table: remove schwas, encode unicode chars
This commit is contained in:
parent
939c58db18
commit
f67fb24333
3 changed files with 93 additions and 90 deletions
162
iast.c
162
iast.c
|
@ -1,99 +1,97 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/* https://en.wikipedia.org/wiki/IAST */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include "compat.h"
|
||||||
|
|
||||||
#include "iast.h"
|
#include "iast.h"
|
||||||
#include "transliteration.h"
|
|
||||||
|
|
||||||
|
static const struct translit_letter table[] = {
|
||||||
static const struct transliteration_letter table[] = {
|
|
||||||
|
|
||||||
/* Vowels */
|
/* Vowels */
|
||||||
{0x0905, FLAG_REGULAR, "a"}, /* अ */ /* 1 */
|
{0x0910, VOWEL, "ai"}, /* 01 */
|
||||||
{0x0906, FLAG_REGULAR, "ā"}, /* आ */ /* 2 */
|
{0x0914, VOWEL, "au"}, /* 02 */
|
||||||
{0x0907, FLAG_REGULAR, "i"}, /* इ */ /* 3 */
|
{0x0905, VOWEL, "a"}, /* 03 */
|
||||||
{0x0908, FLAG_REGULAR, "ī"}, /* ई */ /* 4 */
|
{0x0906, VOWEL, "\u0101"}, /* 04 (a-) */
|
||||||
{0x0909, FLAG_REGULAR, "u"}, /* उ */ /* 5 */
|
{0x0907, VOWEL, "i"}, /* 05 */
|
||||||
{0x090a, FLAG_REGULAR, "ū"}, /* ऊ */ /* 6 */
|
{0x0908, VOWEL, "\u012b"}, /* 06 (i-) */
|
||||||
{0x090b, FLAG_REGULAR, "ṛ"}, /* ऋ */ /* 7 */
|
{0x0909, VOWEL, "u"}, /* 07 */
|
||||||
{0x0960, FLAG_REGULAR, "ṝ"}, /* ॠ */ /* 8 */
|
{0x090a, VOWEL, "\u016b"}, /* 08 (u-) */
|
||||||
{0x090c, FLAG_REGULAR, "ḷ"}, /* ऌ */ /* 9 */
|
{0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */
|
||||||
{0x0961, FLAG_REGULAR, "ḹ"}, /* ॡ */ /* 10 */
|
{0x0960, VOWEL, "\u1e5d"}, /* 10 (.r-) */
|
||||||
{0x090f, FLAG_REGULAR, "e"}, /* ए */ /* 11 */
|
{0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */
|
||||||
{0x0910, FLAG_REGULAR, "ai"}, /* ऐ */ /* 12 */
|
{0x0961, VOWEL, "\u1e39"}, /* 12 (.l-) */
|
||||||
{0x0913, FLAG_REGULAR, "o"}, /* ओ */ /* 13 */
|
{0x090f, VOWEL, "e"}, /* 13 */
|
||||||
{0x0914, FLAG_REGULAR, "au"}, /* औ */ /* 14 */
|
{0x0913, VOWEL, "o"}, /* 14 */
|
||||||
|
|
||||||
/* Consonants */
|
/* Consonants */
|
||||||
{0x0915, FLAG_REGULAR, "ka"}, /* क */ /* 1 */
|
{0x0916, CONSONANT, "kh"}, /* 01 */
|
||||||
{0x0916, FLAG_REGULAR, "kha"}, /* ख */ /* 2 */
|
{0x0918, CONSONANT, "gh"}, /* 02 */
|
||||||
{0x0917, FLAG_REGULAR, "ga"}, /* ग */ /* 3 */
|
{0x091b, CONSONANT, "ch"}, /* 03 */
|
||||||
{0x0918, FLAG_REGULAR, "gha"}, /* घ */ /* 4 */
|
{0x091d, CONSONANT, "jh"}, /* 04 */
|
||||||
{0x0919, FLAG_REGULAR, "ṅa"}, /* ङ */ /* 5 */
|
{0x0920, CONSONANT, "\u1e6dh"}, /* 05 (.th) */
|
||||||
{0x0939, FLAG_REGULAR, "ha"}, /* ह */ /* 6 */
|
{0x0922, CONSONANT, "\u1e0dh"}, /* 06 (.dh) */
|
||||||
{0x091a, FLAG_REGULAR, "ca"}, /* च */ /* 7 */
|
{0x0925, CONSONANT, "th"}, /* 07 */
|
||||||
{0x091b, FLAG_REGULAR, "cha"}, /* छ */ /* 8 */
|
{0x0927, CONSONANT, "dh"}, /* 08 */
|
||||||
{0x091c, FLAG_REGULAR, "ja"}, /* ज */ /* 9 */
|
{0x092b, CONSONANT, "ph"}, /* 09 */
|
||||||
{0x091d, FLAG_REGULAR, "jha"}, /* झ */ /* 10 */
|
{0x092d, CONSONANT, "bh"}, /* 10 */
|
||||||
{0x091e, FLAG_REGULAR, "ña"}, /* ञ */ /* 11 */
|
{0x0915, CONSONANT, "k"}, /* 11 */
|
||||||
{0x092f, FLAG_REGULAR, "ya"}, /* य */ /* 12 */
|
{0x0917, CONSONANT, "g"}, /* 12 */
|
||||||
{0x0936, FLAG_REGULAR, "śa"}, /* श */ /* 13 */
|
{0x0919, CONSONANT, "\u1e45"}, /* 13 (n.) */
|
||||||
{0x091F, FLAG_REGULAR, "ṭa"}, /* ट */ /* 14 */
|
{0x0939, CONSONANT, "h"}, /* 14 */
|
||||||
{0x0920, FLAG_REGULAR, "ṭha"}, /* ठ */ /* 15 */
|
{0x091a, CONSONANT, "c"}, /* 15 */
|
||||||
{0x0921, FLAG_REGULAR, "ḍa"}, /* ड */ /* 16 */
|
{0x091c, CONSONANT, "j"}, /* 16 */
|
||||||
{0x0922, FLAG_REGULAR, "ḍha"}, /* ढ */ /* 17 */
|
{0x091e, CONSONANT, "\u00f1"}, /* 17 (n~) */
|
||||||
{0x0923, FLAG_REGULAR, "ṇa"}, /* ण */ /* 18 */
|
{0x092f, CONSONANT, "y"}, /* 18 */
|
||||||
{0x0930, FLAG_REGULAR, "ra"}, /* र */ /* 19 */
|
{0x0936, CONSONANT, "\u015b"}, /* 19 (s,) */
|
||||||
{0x0937, FLAG_REGULAR, "ṣa"}, /* श */ /* 20 */
|
{0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */
|
||||||
{0x0924, FLAG_REGULAR, "ta"}, /* त */ /* 21 */
|
{0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */
|
||||||
{0x0925, FLAG_REGULAR, "tha"}, /* थ */ /* 22 */
|
{0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */
|
||||||
{0x0926, FLAG_REGULAR, "da"}, /* द */ /* 23 */
|
{0x0930, CONSONANT, "r"}, /* 23 */
|
||||||
{0x0927, FLAG_REGULAR, "dha"}, /* ध */ /* 24 */
|
{0x0937, CONSONANT, "\u1e63"}, /* 24 (.s) */
|
||||||
{0x0928, FLAG_REGULAR, "na"}, /* न */ /* 25 */
|
{0x0924, CONSONANT, "t"}, /* 25 */
|
||||||
{0x0932, FLAG_REGULAR, "la"}, /* ल */ /* 26 */
|
{0x0926, CONSONANT, "d"}, /* 26 */
|
||||||
{0x0938, FLAG_REGULAR, "sa"}, /* स */ /* 27 */
|
{0x0928, CONSONANT, "n"}, /* 27 */
|
||||||
{0x092a, FLAG_REGULAR, "pa"}, /* प */ /* 28 */
|
{0x0932, CONSONANT, "l"}, /* 28 */
|
||||||
{0x092b, FLAG_REGULAR, "pha"}, /* फ */ /* 29 */
|
{0x0938, CONSONANT, "s"}, /* 29 */
|
||||||
{0x092c, FLAG_REGULAR, "ba"}, /* ब */ /* 30 */
|
{0x092a, CONSONANT, "p"}, /* 30 */
|
||||||
{0x092d, FLAG_REGULAR, "bha"}, /* भ */ /* 31 */
|
{0x092c, CONSONANT, "b"}, /* 31 */
|
||||||
{0x092e, FLAG_REGULAR, "ma"}, /* म */ /* 32 */
|
{0x092e, CONSONANT, "m"}, /* 32 */
|
||||||
{0x0935, FLAG_REGULAR, "va"}, /* व */ /* 33 */
|
{0x0935, CONSONANT, "v"}, /* 33 */
|
||||||
|
|
||||||
/* Codas */
|
/* Codas */
|
||||||
{0x0902, FLAG_REGULAR, "ṃ"}, /* ं (anusvara) */
|
{0x0902, CODA, "\u1e43"}, /* anusvara (.m) */
|
||||||
{0x0903, FLAG_REGULAR, "ḥ"}, /* ः (visarga) */
|
{0x0903, CODA, "\u1e25"}, /* visarga (.h) */
|
||||||
{0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */
|
{0x093d, CODA, "'"}, /* avagrada (') */
|
||||||
|
|
||||||
/* Special characters */
|
/* Special characters */
|
||||||
{0x0950, FLAG_REGULAR, "aum"}, /* ॐ */
|
{0x0950, SPECIAL, "aum"}, /* aum */
|
||||||
|
|
||||||
/* Numbers */
|
/* Numbers */
|
||||||
{0x0966, FLAG_REGULAR, "0"},
|
{0x0966, NUMBER, "0"},
|
||||||
{0x0967, FLAG_REGULAR, "1"},
|
{0x0967, NUMBER, "1"},
|
||||||
{0x0968, FLAG_REGULAR, "2"},
|
{0x0968, NUMBER, "2"},
|
||||||
{0x0969, FLAG_REGULAR, "3"},
|
{0x0969, NUMBER, "3"},
|
||||||
{0x096a, FLAG_REGULAR, "4"},
|
{0x096a, NUMBER, "4"},
|
||||||
{0x096b, FLAG_REGULAR, "5"},
|
{0x096b, NUMBER, "5"},
|
||||||
{0x096c, FLAG_REGULAR, "6"},
|
{0x096c, NUMBER, "6"},
|
||||||
{0x096d, FLAG_REGULAR, "7"},
|
{0x096d, NUMBER, "7"},
|
||||||
{0x096e, FLAG_REGULAR, "8"},
|
{0x096e, NUMBER, "8"},
|
||||||
{0x096f, FLAG_REGULAR, "9"},
|
{0x096f, NUMBER, "9"},
|
||||||
|
|
||||||
/* Diacritic modifiers */
|
/* Diacritic modifiers */
|
||||||
{0x093e, FLAG_MODIFIER, "ā"}, /* ा */
|
{0x0948, VOWEL_SIGN, "ai"},
|
||||||
{0x093f, FLAG_MODIFIER, "i"}, /* ि */
|
{0x094c, VOWEL_SIGN, "au"},
|
||||||
{0x0940, FLAG_MODIFIER, "ī"}, /* ी */
|
{0x093e, VOWEL_SIGN, "\u0101"}, /* (a-) */
|
||||||
{0x0941, FLAG_MODIFIER, "u"}, /* ु */
|
{0x093f, VOWEL_SIGN, "i"},
|
||||||
{0x0942, FLAG_MODIFIER, "ū"}, /* ू */
|
{0x0940, VOWEL_SIGN, "\u012b"}, /* (i-) */
|
||||||
{0x0943, FLAG_MODIFIER, "ṛ"}, /* ृ */
|
{0x0941, VOWEL_SIGN, "u"},
|
||||||
{0x0944, FLAG_MODIFIER, "ṝ"}, /* ॄ */
|
{0x0942, VOWEL_SIGN, "\u016b"}, /* (u-) */
|
||||||
{0x0962, FLAG_MODIFIER, "ḷ"}, /* ॢ */
|
{0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */
|
||||||
{0x0963, FLAG_MODIFIER, "ḹ"}, /* ॣ */
|
{0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.r-) */
|
||||||
{0x0947, FLAG_MODIFIER, "e"}, /* े */
|
{0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */
|
||||||
{0x0948, FLAG_MODIFIER, "ai"}, /* ै */
|
{0x0963, VOWEL_SIGN, "\u1e39"}, /* (.l-) */
|
||||||
{0x094b, FLAG_MODIFIER, "o"}, /* ो */
|
{0x0947, VOWEL_SIGN, "e"},
|
||||||
{0x094c, FLAG_MODIFIER, "au"}, /* ौ */
|
{0x094b, VOWEL_SIGN, "o"},
|
||||||
{0x094d, FLAG_MODIFIER, ""}, /* ् (virama) */
|
{0x094d, VOWEL_SIGN, ""}, /* virama */
|
||||||
|
|
||||||
{0, 0, NULL}
|
{0, 0, NULL}
|
||||||
};
|
};
|
||||||
|
@ -102,12 +100,12 @@ static const transliteration_filter_t filters[] = {
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct transliteration_context context = {
|
static struct translit_context context = {
|
||||||
.table = table,
|
.table = table,
|
||||||
.filters = filters
|
.filters = filters
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct transliteration_context *get_iast_transliteration_context()
|
struct translit_context *get_iast_transliteration_context()
|
||||||
{
|
{
|
||||||
return &context;
|
return &context;
|
||||||
}
|
}
|
||||||
|
|
2
iast.h
2
iast.h
|
@ -5,6 +5,6 @@
|
||||||
|
|
||||||
#include "transliteration.h"
|
#include "transliteration.h"
|
||||||
|
|
||||||
const struct transliteration_context *get_iast_transliteration_context();
|
struct translit_context *get_iast_transliteration_context();
|
||||||
|
|
||||||
#endif /* __IAST_H */
|
#endif /* __IAST_H */
|
||||||
|
|
|
@ -5,20 +5,25 @@
|
||||||
|
|
||||||
#include "syllable.h"
|
#include "syllable.h"
|
||||||
|
|
||||||
|
enum translit_letter_type {
|
||||||
#define FLAG_REGULAR 1 << 0
|
VOWEL,
|
||||||
#define FLAG_MODIFIER 1 << 1
|
CONSONANT,
|
||||||
|
CODA,
|
||||||
|
SPECIAL,
|
||||||
|
NUMBER,
|
||||||
|
VOWEL_SIGN
|
||||||
|
};
|
||||||
|
|
||||||
typedef void (*transliteration_filter_t)(struct syllable *syllable_chain);
|
typedef void (*transliteration_filter_t)(struct syllable *syllable_chain);
|
||||||
|
|
||||||
struct transliteration_letter {
|
struct translit_letter {
|
||||||
unsigned int code;
|
unsigned int code;
|
||||||
unsigned int flags;
|
enum translit_letter_type type;
|
||||||
const char *data;
|
const char *data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct transliteration_context {
|
struct translit_context {
|
||||||
const struct transliteration_letter *table;
|
const struct translit_letter *table;
|
||||||
const transliteration_filter_t *filters;
|
const transliteration_filter_t *filters;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue