diff --git a/compat.h b/compat.h new file mode 100644 index 0000000..d41a770 --- /dev/null +++ b/compat.h @@ -0,0 +1,7 @@ +#ifndef __COMPAT_H +#define __COMPAT_H + +#include +#include + +#endif /* __COMPAT_H */ diff --git a/iast-czech.c b/iast-czech.c deleted file mode 100644 index e2dab57..0000000 --- a/iast-czech.c +++ /dev/null @@ -1,174 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -/* https://cs.wikipedia.org/wiki/Wikipedie:Transkripce_hindštiny */ - -#include -#include -#include - -#include "iast-czech.h" -#include "transliteration.h" - - -static const struct transliteration_letter table[] = { - - /* Vowels */ - {0x0905, FLAG_REGULAR, "a"}, /* अ */ /* 1 */ - {0x0906, FLAG_REGULAR, "á"}, /* आ */ /* 2 */ - {0x0907, FLAG_REGULAR, "i"}, /* इ */ /* 3 */ - {0x0908, FLAG_REGULAR, "í"}, /* ई */ /* 4 */ - {0x0909, FLAG_REGULAR, "u"}, /* उ */ /* 5 */ - {0x090a, FLAG_REGULAR, "ú"}, /* ऊ */ /* 6 */ - {0x090b, FLAG_REGULAR, "r"}, /* ऋ */ /* 7 */ - {0x0960, FLAG_REGULAR, "r"}, /* ॠ */ /* 8 */ - {0x090c, FLAG_REGULAR, "l"}, /* ऌ */ /* 9 */ - {0x0961, FLAG_REGULAR, "l"}, /* ॡ */ /* 10 */ - {0x090f, FLAG_REGULAR, "é"}, /* ए */ /* 11 */ - {0x0910, FLAG_REGULAR, "ai"}, /* ऐ */ /* 12 */ - {0x0913, FLAG_REGULAR, "ó"}, /* ओ */ /* 13 */ - {0x0914, FLAG_REGULAR, "au"}, /* औ */ /* 14 */ - - /* Consonants */ - {0x0915, FLAG_REGULAR, "ka"}, /* क */ /* 1 */ - {0x0916, FLAG_REGULAR, "kha"}, /* ख */ /* 2 */ - {0x0917, FLAG_REGULAR, "ga"}, /* ग */ /* 3 */ - {0x0918, FLAG_REGULAR, "gha"}, /* घ */ /* 4 */ - {0x0919, FLAG_REGULAR, "na"}, /* ङ */ /* 5 */ - {0x0939, FLAG_REGULAR, "ha"}, /* ह */ /* 6 */ - {0x091a, FLAG_REGULAR, "ca"}, /* च */ /* 7 */ - {0x091b, FLAG_REGULAR, "cha"}, /* छ */ /* 8 */ - {0x091c, FLAG_REGULAR, "dža"}, /* ज */ /* 9 */ - {0x091d, FLAG_REGULAR, "džha"}, /* झ */ /* 10 */ - {0x091e, FLAG_REGULAR, "ňa"}, /* ञ */ /* 11 */ - {0x092f, FLAG_REGULAR, "ja"}, /* य */ /* 12 */ - {0x0936, FLAG_REGULAR, "ša"}, /* श */ /* 13 */ - {0x091F, FLAG_REGULAR, "ta"}, /* ट */ /* 14 */ - {0x0920, FLAG_REGULAR, "tha"}, /* ठ */ /* 15 */ - {0x0921, FLAG_REGULAR, "da"}, /* ड */ /* 16 */ - {0x0922, FLAG_REGULAR, "dha"}, /* ढ */ /* 17 */ - {0x0923, FLAG_REGULAR, "na"}, /* ण */ /* 18 */ - {0x0930, FLAG_REGULAR, "ra"}, /* र */ /* 19 */ - {0x0937, FLAG_REGULAR, "ša"}, /* श */ /* 20 */ - {0x0924, FLAG_REGULAR, "ta"}, /* त */ /* 21 */ - {0x0925, FLAG_REGULAR, "tha"}, /* थ */ /* 22 */ - {0x0926, FLAG_REGULAR, "da"}, /* द */ /* 23 */ - {0x0927, FLAG_REGULAR, "dha"}, /* ध */ /* 24 */ - {0x0928, FLAG_REGULAR, "na"}, /* न */ /* 25 */ - {0x0932, FLAG_REGULAR, "la"}, /* ल */ /* 26 */ - {0x0938, FLAG_REGULAR, "sa"}, /* स */ /* 27 */ - {0x092a, FLAG_REGULAR, "pa"}, /* प */ /* 28 */ - {0x092b, FLAG_REGULAR, "pha"}, /* फ */ /* 29 */ - {0x092c, FLAG_REGULAR, "ba"}, /* ब */ /* 30 */ - {0x092d, FLAG_REGULAR, "bha"}, /* भ */ /* 31 */ - {0x092e, FLAG_REGULAR, "ma"}, /* म */ /* 32 */ - {0x0935, FLAG_REGULAR, "va"}, /* व */ /* 33 */ - - /* Codas */ - {0x0902, FLAG_REGULAR, "m"}, /* ं (anusvara) */ - {0x0903, FLAG_REGULAR, ""}, /* ः (visarga) */ - {0x093d, FLAG_REGULAR, "'"}, /* ऽ (avagrada) */ - - /* Special characters */ - {0x0950, FLAG_REGULAR, "óm"}, /* ॐ */ - - /* Numbers */ - {0x0966, FLAG_REGULAR, "0"}, - {0x0967, FLAG_REGULAR, "1"}, - {0x0968, FLAG_REGULAR, "2"}, - {0x0969, FLAG_REGULAR, "3"}, - {0x096a, FLAG_REGULAR, "4"}, - {0x096b, FLAG_REGULAR, "5"}, - {0x096c, FLAG_REGULAR, "6"}, - {0x096d, FLAG_REGULAR, "7"}, - {0x096e, FLAG_REGULAR, "8"}, - {0x096f, FLAG_REGULAR, "9"}, - - /* Diacritic modifiers */ - {0x093e, FLAG_MODIFIER, "á"}, /* ा */ - {0x093f, FLAG_MODIFIER, "i"}, /* ि */ - {0x0940, FLAG_MODIFIER, "í"}, /* ी */ - {0x0941, FLAG_MODIFIER, "u"}, /* ु */ - {0x0942, FLAG_MODIFIER, "ú"}, /* ू */ - {0x0943, FLAG_MODIFIER, "r"}, /* ृ */ - {0x0944, FLAG_MODIFIER, "r"}, /* ॄ */ - {0x0962, FLAG_MODIFIER, "l"}, /* ॢ */ - {0x0963, FLAG_MODIFIER, "l"}, /* ॣ */ - {0x0947, FLAG_MODIFIER, "é"}, /* े */ - {0x0948, FLAG_MODIFIER, "ai"}, /* ै */ - {0x094b, FLAG_MODIFIER, "ó"}, /* ो */ - {0x094c, FLAG_MODIFIER, "au"}, /* ौ */ - {0x094d, FLAG_MODIFIER, ""}, /* ् (virama) */ - - {0, 0, NULL} -}; - -static void nasal_consonants_filter(struct syllable *chain) -{ - struct syllable *syllable = chain; - - while (syllable) { - if (is_devanagari(syllable->code) && strcmp(syllable->data, "n") == 0) { - if (syllable->next != NULL) { - if (syllable->next->data[0] == 'p' || - syllable->next->data[0] == 'b' || - syllable->next->data[0] == 'm') { - free(syllable->data); - syllable->data = strdup("m"); - } - } - } - - if (is_devanagari(syllable->code) && strcmp(syllable->data, "m") == 0) { - if (syllable->next != NULL) { - if (syllable->next->data[0] != 'p' && - syllable->next->data[0] != 'b' && - syllable->next->data[0] != 'm') { - free(syllable->data); - syllable->data = strdup("n"); - } - } - } - - - syllable = syllable->next; - } -} - -static void end_of_word_filter(struct syllable *chain) -{ - struct syllable *syllable = chain; - unsigned int n; - - while (syllable) { - if (syllable->next == NULL || isspace(syllable->next->data[0])) { - if (!strcmp(syllable->data, "m")) { - free(syllable->data); - syllable->data = strdup(""); - } - - n = strlen(syllable->data); - if (!strcmp(syllable->data + n - 2, "á")) { - syllable->data[n - 2] = 'a'; - syllable->data[n - 1] = '\0'; - } - } - - syllable = syllable->next; - } -} - -static const transliteration_filter_t filters[] = { - nasal_consonants_filter, - end_of_word_filter, - NULL -}; - -static const struct transliteration_context context = { - .table = table, - .filters = filters -}; - -const struct transliteration_context *get_iast_czech_transliteration_context() -{ - return &context; -} diff --git a/iast-czech.h b/iast-czech.h deleted file mode 100644 index fc3bad2..0000000 --- a/iast-czech.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __IAST_CZECH_H -#define __IAST_CZECH_H - -#include "transliteration.h" - -const struct transliteration_context *get_iast_czech_transliteration_context(); - -#endif /* __IAST_CZECH_H */ diff --git a/iast.c b/iast.c index e0b72f2..b71ab52 100644 --- a/iast.c +++ b/iast.c @@ -4,7 +4,7 @@ #include "compat.h" #include "iast.h" -static const struct translit_letter table[] = { +static struct translit_letter table[] = { /* Vowels */ {0x0910, VOWEL, "ai"}, /* 01 */ @@ -96,16 +96,7 @@ static const struct translit_letter table[] = { {0, 0, NULL} }; -static const transliteration_filter_t filters[] = { - NULL -}; - -static struct translit_context context = { - .table = table, - .filters = filters -}; - -struct translit_context *get_iast_transliteration_context() +struct translit_letter *get_iast_transliteration_table() { - return &context; + return table; } diff --git a/iast.h b/iast.h index 3a7111b..34e6e4e 100644 --- a/iast.h +++ b/iast.h @@ -5,6 +5,6 @@ #include "transliteration.h" -struct translit_context *get_iast_transliteration_context(); +struct translit_letter *get_iast_transliteration_table(); #endif /* __IAST_H */ diff --git a/main.c b/main.c index 25e3f5c..dc9adbf 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,6 @@ #include "transliteration.h" #include "iast.h" -#include "iast-czech.h" #include "encoder.h" #define PROGNAME "iast" @@ -88,7 +87,6 @@ int main(int argc, const char **argv) const char *arg; const char *queue[argc]; char *input, *output; - const struct transliteration_context *context; for (i = 1; i < argc; i++) { arg = argv[i]; @@ -119,10 +117,6 @@ int main(int argc, const char **argv) } } - context = (flags & FLAG_CZECH) - ? get_iast_czech_transliteration_context() - : get_iast_transliteration_context(); - if (flags & FLAG_STDIN) { input = stdin_read(); if (input == NULL) { @@ -133,7 +127,7 @@ int main(int argc, const char **argv) if (flags & FLAG_ENCODE) { output = encode_iast_punctation(input); } else { - output = transliterate_devanagari_to_latin(input, context); + output = transliterate_devanagari_to_latin(input); } fprintf(stdout, "%s\n", output); @@ -145,7 +139,7 @@ int main(int argc, const char **argv) if (flags & FLAG_ENCODE) { output = encode_iast_punctation(queue[i]); } else { - output = transliterate_devanagari_to_latin(queue[i], context); + output = transliterate_devanagari_to_latin(queue[i]); } fprintf(stdout, "%s\n", output); diff --git a/syllable.c b/syllable.c deleted file mode 100644 index 0b7f37e..0000000 --- a/syllable.c +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#include -#include -#include "syllable.h" -#include "utf8.h" - - -struct syllable *syllable_alloc(const char *data, unsigned int code) -{ - struct syllable *ptr = malloc(sizeof(*ptr)); - - if (ptr == NULL) - goto out; - - ptr->data = data != NULL ? strdup(data) : utf8_code_to_string(code); - ptr->code = code; - ptr->prev = NULL; - ptr->next = NULL; - -out: - return ptr; -} - -void syllable_drop(struct syllable *ptr) -{ - if (ptr == NULL) - return; - - free(ptr->data); - free(ptr); -} - -unsigned int syllable_chain_length(struct syllable *head) -{ - struct syllable *walk = head; - unsigned int length = 0; - - while (walk) { - length += strlen(walk->data); - walk = walk->next; - } - - return length; -} - -char *syllable_chain_to_string(struct syllable *head) -{ - struct syllable *walk = head; - unsigned int length = syllable_chain_length(head); - char *buffer = malloc(length + 1); - char *ptr = buffer; - - while (walk) { - strcpy(ptr, walk->data); - ptr += strlen(walk->data); - walk = walk->next; - } - - return buffer; -} - -void syllable_chain_drop(struct syllable *head) -{ - struct syllable *walk = head, *next; - - while (walk) { - next = walk->next; - syllable_drop(walk); - walk = next; - } -} diff --git a/syllable.h b/syllable.h deleted file mode 100644 index f2b835d..0000000 --- a/syllable.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __SYLLABE_H -#define __SYLLABE_H - -struct syllable { - char *data; - unsigned int code; - struct syllable *prev; - struct syllable *next; -}; - -struct syllable *syllable_alloc(const char *data, unsigned int code); -void syllable_drop(struct syllable *syllable); - -unsigned int syllable_chain_length(struct syllable *head); -char *syllable_chain_to_string(struct syllable *head); -void syllable_chain_drop(struct syllable *head); - -#endif /* __SYLLABE_H */ diff --git a/transliteration.c b/transliteration.c index 10e55fa..c9d5dd2 100644 --- a/transliteration.c +++ b/transliteration.c @@ -1,84 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include -#include - +#include "compat.h" #include "transliteration.h" -#include "syllable.h" +#include "iast.h" #include "utf8.h" -static const struct transliteration_letter *find_letter_by_code(unsigned int c, - const struct transliteration_letter *table) +char *transliterate_devanagari_to_latin(const char *devanagari) { - const struct transliteration_letter *walk = table; - - if (c == 0) - return NULL; - - while (walk->code != 0) { - if (c == walk->code) - return walk; - walk++; - } - - return NULL; -} - -static void syllable_modify(struct syllable *syllable, const char *data) -{ - char buffer[10]; - - if (syllable == NULL) - return; - - strcpy(buffer, syllable->data); - buffer[strlen(buffer) - 1] = 0; - strcat(buffer, data); - - free(syllable->data); - syllable->data = strdup(buffer); -} - -static void apply_transliteration_filters(struct syllable *head, - const transliteration_filter_t *filters) -{ - const transliteration_filter_t *filter = filters; - - while (*filter) { - (*filter)(head); - filter++; - } -} - -char *transliterate_devanagari_to_latin(const char *devanagari, - const struct transliteration_context *context) -{ - struct syllable *head = NULL, *tail = NULL, **indirect = &head; - const struct transliteration_letter *letter; - const char *ptr = devanagari; - const char *end = ptr + strlen(devanagari); - unsigned int code; + struct translit_letter *table; char *retval; - while (ptr < end) { - code = utf8_unpack_char(ptr); - ptr += utf8_char_length(code); + table = get_iast_transliteration_table(); - letter = find_letter_by_code(code, context->table); - if (letter && letter->flags & FLAG_MODIFIER) { - syllable_modify(tail, letter->data); - continue; - } - - *indirect = syllable_alloc(letter ? letter->data : NULL, code); - tail = *indirect; - indirect = &(*indirect)->next; - } - - apply_transliteration_filters(head, context->filters); - - retval = syllable_chain_to_string(head); - syllable_chain_drop(head); + retval = strdup(""); /* FIXME */ return retval; } diff --git a/transliteration.h b/transliteration.h index 5b63486..9e52b2f 100644 --- a/transliteration.h +++ b/transliteration.h @@ -3,8 +3,6 @@ #ifndef __TRANSLITERATION_H #define __TRANSLITERATION_H -#include "syllable.h" - enum translit_letter_type { VOWEL, CONSONANT, @@ -14,8 +12,6 @@ enum translit_letter_type { VOWEL_SIGN }; -typedef void (*transliteration_filter_t)(struct syllable *syllable_chain); - struct translit_letter { unsigned int code; enum translit_letter_type type; @@ -23,12 +19,10 @@ struct translit_letter { }; struct translit_context { - const struct translit_letter *table; - const transliteration_filter_t *filters; + struct translit_letter *table; }; -char *transliterate_devanagari_to_latin(const char *text, - const struct transliteration_context *context); +char *transliterate_devanagari_to_latin(const char *text); static inline int is_devanagari(unsigned int code) {