update docs

This commit is contained in:
Vlasta Vesely 2020-10-16 09:21:11 +02:00
parent 3bb8a46732
commit 5f68c20f83
2 changed files with 34 additions and 27 deletions

37
iast.1
View file

@ -1,4 +1,4 @@
.TH "iast" "1" "25 September 2020" "sanskrit-iast" "Sanskrit Transliteration" .TH "iast" "1" "16 October 2020" "sanskrit-iast" "Sanskrit Transliteration"
.SH NAME .SH NAME
.B iast .B iast
@ -49,7 +49,7 @@ Transcript a Devanagari text into Czech.
.B \-e .B \-e
.RS 4 .RS 4
Convert a symbolic ASCII(7) text to IAST representation. Convert Velthuis scheme text to IAST representation
.RE .RE
.B \-h .B \-h
@ -64,20 +64,17 @@ Show version number and exit.
.SH ENCODING .SH ENCODING
When the flag \fB-e\fR is set on, the program converts purely ASCII-encoded strings When the flag \fB-e\fR is set on, the program converts strings encoded using
into the special characters of the IAST alphabet. For example, it converts the Velthuis scheme (purely ASCII-encoded strings) into the special characters
sam.skr.tam to saṃskṛtam or s,a-stram to śāstram. of the IAST alphabet. For example, it can convert sa.msk.rtam to saṃskṛtam
or "saastram to śāstram.
The encoding scheme is based on the following principle: characters -, ., The encoding scheme is based on the following principle: the characters .,
, and ~ are considered to be modifiers that modify a letter that stands " and ~ are considered to be modifiers that modify the letter that stands
before the modifier. For example, if the string to be encoded contains a-, after the modifier. For example, if the string to be encoded contains .t,
it will be encoded as ‘ā’ and the sequence n~ as ‘ñ’ and so on. The only it will be encoded as ‘ṭ’, the sequence ~n as ‘ñ’ and so on. Long vowels are
exception from the rule is the letter n because it can be encoded both with marked by doubled letter corresponding to the vowel; thus aa is encoded to
an over-dot (ṅ) and under-dot (ṇ). For the over-dot variant, the conversion ‘ā’, .rr to ‘ṝ’ and so on.
sequence is n^..
Alternatively, the Czech characters ‘á’, ‘í’, ‘ú’, ‘š’ and ‘ň’ can be used as
shortcuts to encode the letters ‘ā’, ‘ī’, ‘ū’, ‘ś’ and ‘ñ’ respectively.
.SH AUTHOR .SH AUTHOR
@ -104,3 +101,13 @@ Free Software Foundation.
This program is distributed in the hope that it will be useful, but WITHOUT This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
.SH SEE ALSO
More information on the encoding schemes can be found on Wikipedia:
.RS 4
IAST scheme: <https://en.wikipedia.org/wiki/IAST>
.br
Velthuis scheme: <https://en.wikipedia.org/wiki/Velthuis>
.RE

24
iast.c
View file

@ -13,15 +13,15 @@ static struct translit_letter table[] = {
{0x0910, VOWEL, "ai"}, /* 01 */ {0x0910, VOWEL, "ai"}, /* 01 */
{0x0914, VOWEL, "au"}, /* 02 */ {0x0914, VOWEL, "au"}, /* 02 */
{0x0905, VOWEL, "a"}, /* 03 */ {0x0905, VOWEL, "a"}, /* 03 */
{0x0906, VOWEL, "\u0101"}, /* 04 (a-) */ {0x0906, VOWEL, "\u0101"}, /* 04 (aa) */
{0x0907, VOWEL, "i"}, /* 05 */ {0x0907, VOWEL, "i"}, /* 05 */
{0x0908, VOWEL, "\u012b"}, /* 06 (i-) */ {0x0908, VOWEL, "\u012b"}, /* 06 (ii) */
{0x0909, VOWEL, "u"}, /* 07 */ {0x0909, VOWEL, "u"}, /* 07 */
{0x090a, VOWEL, "\u016b"}, /* 08 (u-) */ {0x090a, VOWEL, "\u016b"}, /* 08 (uu) */
{0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */ {0x090b, VOWEL, "\u1e5b"}, /* 09 (.r) */
{0x0960, VOWEL, "\u1e5d"}, /* 10 (.r-) */ {0x0960, VOWEL, "\u1e5d"}, /* 10 (.rr) */
{0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */ {0x090c, VOWEL, "\u1e37"}, /* 11 (.l) */
{0x0961, VOWEL, "\u1e39"}, /* 12 (.l-) */ {0x0961, VOWEL, "\u1e39"}, /* 12 (.ll) */
{0x090f, VOWEL, "e"}, /* 13 */ {0x090f, VOWEL, "e"}, /* 13 */
{0x0913, VOWEL, "o"}, /* 14 */ {0x0913, VOWEL, "o"}, /* 14 */
@ -38,13 +38,13 @@ static struct translit_letter table[] = {
{0x092d, CONSONANT, "bh"}, /* 10 */ {0x092d, CONSONANT, "bh"}, /* 10 */
{0x0915, CONSONANT, "k"}, /* 11 */ {0x0915, CONSONANT, "k"}, /* 11 */
{0x0917, CONSONANT, "g"}, /* 12 */ {0x0917, CONSONANT, "g"}, /* 12 */
{0x0919, CONSONANT, "\u1e45"}, /* 13 (n.) */ {0x0919, CONSONANT, "\u1e45"}, /* 13 ("n) */
{0x0939, CONSONANT, "h"}, /* 14 */ {0x0939, CONSONANT, "h"}, /* 14 */
{0x091a, CONSONANT, "c"}, /* 15 */ {0x091a, CONSONANT, "c"}, /* 15 */
{0x091c, CONSONANT, "j"}, /* 16 */ {0x091c, CONSONANT, "j"}, /* 16 */
{0x091e, CONSONANT, "\u00f1"}, /* 17 (n~) */ {0x091e, CONSONANT, "\u00f1"}, /* 17 (n~) */
{0x092f, CONSONANT, "y"}, /* 18 */ {0x092f, CONSONANT, "y"}, /* 18 */
{0x0936, CONSONANT, "\u015b"}, /* 19 (s,) */ {0x0936, CONSONANT, "\u015b"}, /* 19 ("s) */
{0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */ {0x091f, CONSONANT, "\u1e6d"}, /* 20 (.t) */
{0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */ {0x0921, CONSONANT, "\u1e0d"}, /* 21 (.d) */
{0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */ {0x0923, CONSONANT, "\u1e47"}, /* 22 (.n) */
@ -82,15 +82,15 @@ static struct translit_letter table[] = {
/* Diacritic modifiers */ /* Diacritic modifiers */
{0x0948, VOWEL_SIGN, "ai"}, {0x0948, VOWEL_SIGN, "ai"},
{0x094c, VOWEL_SIGN, "au"}, {0x094c, VOWEL_SIGN, "au"},
{0x093e, VOWEL_SIGN, "\u0101"}, /* (a-) */ {0x093e, VOWEL_SIGN, "\u0101"}, /* (aa) */
{0x093f, VOWEL_SIGN, "i"}, {0x093f, VOWEL_SIGN, "i"},
{0x0940, VOWEL_SIGN, "\u012b"}, /* (i-) */ {0x0940, VOWEL_SIGN, "\u012b"}, /* (ii) */
{0x0941, VOWEL_SIGN, "u"}, {0x0941, VOWEL_SIGN, "u"},
{0x0942, VOWEL_SIGN, "\u016b"}, /* (u-) */ {0x0942, VOWEL_SIGN, "\u016b"}, /* (uu) */
{0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */ {0x0943, VOWEL_SIGN, "\u1e5b"}, /* (.r) */
{0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.r-) */ {0x0944, VOWEL_SIGN, "\u1e5d"}, /* (.rr) */
{0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */ {0x0962, VOWEL_SIGN, "\u1e37"}, /* (.l) */
{0x0963, VOWEL_SIGN, "\u1e39"}, /* (.l-) */ {0x0963, VOWEL_SIGN, "\u1e39"}, /* (.ll) */
{0x0947, VOWEL_SIGN, "e"}, {0x0947, VOWEL_SIGN, "e"},
{0x094b, VOWEL_SIGN, "o"}, {0x094b, VOWEL_SIGN, "o"},
{0x094d, VOWEL_SIGN, ""}, /* virama */ {0x094d, VOWEL_SIGN, ""}, /* virama */