reimplement devanagari to latin transliteration

This commit is contained in:
Vlasta Vesely 2020-01-01 17:52:21 +01:00
parent 94388aea07
commit f4d545f4d1
6 changed files with 67 additions and 5 deletions

View file

@ -12,7 +12,7 @@ TEST_LIBS = $(LIBS) $(shell pkg-config --libs check)
all: iast doc/iast.1.gz all: iast doc/iast.1.gz
iast: main.o $(OBJFILES) iast: main.o $(OBJECTS)
$(CC) $^ -o $@ $(CFLAGS) $(CC) $^ -o $@ $(CFLAGS)
test: tests/test test: tests/test

1
main.c
View file

@ -6,7 +6,6 @@
#include <errno.h> #include <errno.h>
#include "transliteration.h" #include "transliteration.h"
#include "iast.h"
#include "encoder.h" #include "encoder.h"
#define PROGNAME "iast" #define PROGNAME "iast"

View file

@ -24,8 +24,14 @@ int main(void)
suite = create_test_suite(); suite = create_test_suite();
runner = srunner_create(suite); runner = srunner_create(suite);
puts("-----------------------------------------");
srunner_run_all(runner, CK_NORMAL); srunner_run_all(runner, CK_NORMAL);
retval = srunner_ntests_failed(runner); retval = srunner_ntests_failed(runner);
puts("-----------------------------------------");
puts(retval == 0 ? "\033[32mpassed\033[0m\n"
: "\033[31mfailed\033[0m\n");
srunner_free(runner); srunner_free(runner);
return retval; return retval;

View file

@ -1,6 +1,7 @@
#ifndef __TEST_TEST_H #ifndef __TEST_TEST_H
#define __TEST_TEST_H #define __TEST_TEST_H
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <check.h> #include <check.h>

View file

@ -5,14 +5,68 @@
#include "iast.h" #include "iast.h"
#include "utf8.h" #include "utf8.h"
#define SCHWA_CHARACTER 'a'
#define CHUNKSIZE 1024
static struct translit_letter *letter_by_code(struct translit_letter *table,
unsigned int c)
{
while (table->code != 0) {
if (table->code == c)
return table;
table++;
}
return NULL;
}
char *transliterate_devanagari_to_latin(const char *devanagari) char *transliterate_devanagari_to_latin(const char *devanagari)
{ {
struct translit_letter *table; struct translit_letter *table;
char *retval; struct translit_letter *letter;
unsigned int c, alloc = 0, done = 0, len;
const char *src = devanagari;
char *latin = NULL;
table = get_iast_transliteration_table(); table = get_iast_transliteration_table();
retval = strdup(""); /* FIXME */ while (1) {
if (alloc < done + UNICODE_MAX_LENGTH) {
return retval; latin = realloc(latin, alloc + CHUNKSIZE);
alloc += CHUNKSIZE;
}
c = utf8_unpack_char(src);
len = utf8_char_length(c);
src += len;
letter = letter_by_code(table, c);
if (letter) {
switch (letter->type) {
case CONSONANT:
strcpy(latin + done, letter->data);
done += strlen(letter->data);
*(latin + done++) = SCHWA_CHARACTER;
break;
case VOWEL_SIGN:
if (done)
done--;
strcpy(latin + done, letter->data);
done += strlen(letter->data);
break;
default:
strcpy(latin + done, letter->data);
done += strlen(letter->data);
break;
}
} else {
utf8_pack_char(latin + done, c);
done += len;
}
if (c == 0)
break;
}
return latin;
} }

2
utf8.h
View file

@ -3,6 +3,8 @@
#ifndef __UTF8_H #ifndef __UTF8_H
#define __UTF8_H #define __UTF8_H
#define UNICODE_MAX_LENGTH 4
unsigned int utf8_unpack_char(const char *src); unsigned int utf8_unpack_char(const char *src);
void utf8_pack_char(char *dest, unsigned int c); void utf8_pack_char(char *dest, unsigned int c);