From 5f68c20f83771759db807164bdf73cbd67e66ffc Mon Sep 17 00:00:00 2001 From: Vlasta Vesely Date: Fri, 16 Oct 2020 09:21:11 +0200 Subject: [PATCH] update docs --- iast.1 | 37 ++++++++++++++++++++++--------------- iast.c | 24 ++++++++++++------------ 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/iast.1 b/iast.1 index 71ad1e4..7f9edcc 100644 --- a/iast.1 +++ b/iast.1 @@ -1,4 +1,4 @@ -.TH "iast" "1" "25 September 2020" "sanskrit-iast" "Sanskrit Transliteration" +.TH "iast" "1" "16 October 2020" "sanskrit-iast" "Sanskrit Transliteration" .SH NAME .B iast @@ -49,7 +49,7 @@ Transcript a Devanagari text into Czech. .B \-e .RS 4 -Convert a symbolic ASCII(7) text to IAST representation. +Convert Velthuis scheme text to IAST representation .RE .B \-h @@ -64,20 +64,17 @@ Show version number and exit. .SH ENCODING -When the flag \fB-e\fR is set on, the program converts purely ASCII-encoded strings -into the special characters of the IAST alphabet. For example, it converts -‘sam.skr.tam’ to ‘saṃskṛtam’ or ‘s,a-stram’ to ‘śāstram’. +When the flag \fB-e\fR is set on, the program converts strings encoded using +the Velthuis scheme (purely ASCII-encoded strings) into the special characters +of the IAST alphabet. For example, it can convert ‘sa.msk.rtam’ to ‘saṃskṛtam’ +or ‘"saastram’ to ‘śāstram’. -The encoding scheme is based on the following principle: characters ‘-’, ‘.’, -‘,’ and ‘~’ are considered to be modifiers that modify a letter that stands -before the modifier. For example, if the string to be encoded contains ‘a-’, -it will be encoded as ‘ā’ and the sequence ‘n~’ as ‘ñ’ and so on. The only -exception from the rule is the letter ‘n’ because it can be encoded both with -an over-dot (ṅ) and under-dot (ṇ). For the over-dot variant, the conversion -sequence is ‘n^.’. - -Alternatively, the Czech characters ‘á’, ‘í’, ‘ú’, ‘š’ and ‘ň’ can be used as -shortcuts to encode the letters ‘ā’, ‘ī’, ‘ū’, ‘ś’ and ‘ñ’ respectively. +The encoding scheme is based on the following principle: the characters ‘.’, +‘"’ and ‘~’ are considered to be modifiers that modify the letter that stands +after the modifier. For example, if the string to be encoded contains ‘.t’, +it will be encoded as ‘ṭ’, the sequence ‘~n’ as ‘ñ’ and so on. Long vowels are +marked by doubled letter corresponding to the vowel; thus ‘aa’ is encoded to +‘ā’, ‘.rr’ to ‘ṝ’ and so on. .SH AUTHOR @@ -104,3 +101,13 @@ Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + +.SH SEE ALSO +More information on the encoding schemes can be found on Wikipedia: + +.RS 4 +IAST scheme: +.br +Velthuis scheme: +.RE diff --git a/iast.c b/iast.c index 7a7f268..1df5572 100644 --- a/iast.c +++ b/iast.c @@ -13,15 +13,15 @@ static struct translit_letter table[] = { {0x0910, VOWEL, "ai"}, /* 01 */ {0x0914, VOWEL, "au"}, /* 02 */ {0x0905, VOWEL, "a"}, /* 03 */ - {0x0906, VOWEL, "\u0101"}, /* 04 (a-) */ + {0x0906, VOWEL, "\u0101"}, /* 04 (aa) */ {0x0907, VOWEL, "i"}, /* 05 */ - {0x0908, VOWEL, "\u012b"}, /* 06 (i-) */ + {0x0908, VOWEL, "\u012b"}, /* 06 (ii) */ {0x0909, VOWEL, "u"}, /* 07 */ - {0x090a, VOWEL, "\u016b"}, /* 08 (u-) */ + {0x090a, VOWEL, "\u016b"}, /* 08 (uu) */ {0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */ - {0x0960, VOWEL, "\u1e5d"}, /* 10 (.r-) */ + {0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */ {0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */ - {0x0961, VOWEL, "\u1e39"}, /* 12 (.l-) */ + {0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */ {0x090f, VOWEL, "e"}, /* 13 */ {0x0913, VOWEL, "o"}, /* 14 */ @@ -38,13 +38,13 @@ static struct translit_letter table[] = { {0x092d, CONSONANT, "bh"}, /* 10 */ {0x0915, CONSONANT, "k"}, /* 11 */ {0x0917, CONSONANT, "g"}, /* 12 */ - {0x0919, CONSONANT, "\u1e45"}, /* 13 (n.) */ + {0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */ {0x0939, CONSONANT, "h"}, /* 14 */ {0x091a, CONSONANT, "c"}, /* 15 */ {0x091c, CONSONANT, "j"}, /* 16 */ {0x091e, CONSONANT, "\u00f1"}, /* 17 (n~) */ {0x092f, CONSONANT, "y"}, /* 18 */ - {0x0936, CONSONANT, "\u015b"}, /* 19 (s,) */ + {0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */ {0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */ {0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */ {0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */ @@ -82,15 +82,15 @@ static struct translit_letter table[] = { /* Diacritic modifiers */ {0x0948, VOWEL_SIGN, "ai"}, {0x094c, VOWEL_SIGN, "au"}, - {0x093e, VOWEL_SIGN, "\u0101"}, /* (a-) */ + {0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */ {0x093f, VOWEL_SIGN, "i"}, - {0x0940, VOWEL_SIGN, "\u012b"}, /* (i-) */ + {0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */ {0x0941, VOWEL_SIGN, "u"}, - {0x0942, VOWEL_SIGN, "\u016b"}, /* (u-) */ + {0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */ {0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */ - {0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.r-) */ + {0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */ {0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */ - {0x0963, VOWEL_SIGN, "\u1e39"}, /* (.l-) */ + {0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */ {0x0947, VOWEL_SIGN, "e"}, {0x094b, VOWEL_SIGN, "o"}, {0x094d, VOWEL_SIGN, ""}, /* virama */