reimplement devanagari to latin transliteration
This commit is contained in:
parent
94388aea07
commit
f4d545f4d1
6 changed files with 67 additions and 5 deletions
2
Makefile
2
Makefile
|
@ -12,7 +12,7 @@ TEST_LIBS = $(LIBS) $(shell pkg-config --libs check)
|
|||
|
||||
all: iast doc/iast.1.gz
|
||||
|
||||
iast: main.o $(OBJFILES)
|
||||
iast: main.o $(OBJECTS)
|
||||
$(CC) $^ -o $@ $(CFLAGS)
|
||||
|
||||
test: tests/test
|
||||
|
|
1
main.c
1
main.c
|
@ -6,7 +6,6 @@
|
|||
#include <errno.h>
|
||||
|
||||
#include "transliteration.h"
|
||||
#include "iast.h"
|
||||
#include "encoder.h"
|
||||
|
||||
#define PROGNAME "iast"
|
||||
|
|
|
@ -24,8 +24,14 @@ int main(void)
|
|||
suite = create_test_suite();
|
||||
runner = srunner_create(suite);
|
||||
|
||||
puts("-----------------------------------------");
|
||||
srunner_run_all(runner, CK_NORMAL);
|
||||
retval = srunner_ntests_failed(runner);
|
||||
puts("-----------------------------------------");
|
||||
|
||||
puts(retval == 0 ? "\033[32mpassed\033[0m\n"
|
||||
: "\033[31mfailed\033[0m\n");
|
||||
|
||||
srunner_free(runner);
|
||||
|
||||
return retval;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef __TEST_TEST_H
|
||||
#define __TEST_TEST_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <check.h>
|
||||
|
||||
|
|
|
@ -5,14 +5,68 @@
|
|||
#include "iast.h"
|
||||
#include "utf8.h"
|
||||
|
||||
#define SCHWA_CHARACTER 'a'
|
||||
#define CHUNKSIZE 1024
|
||||
|
||||
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
||||
unsigned int c)
|
||||
{
|
||||
while (table->code != 0) {
|
||||
if (table->code == c)
|
||||
return table;
|
||||
table++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *transliterate_devanagari_to_latin(const char *devanagari)
|
||||
{
|
||||
struct translit_letter *table;
|
||||
char *retval;
|
||||
struct translit_letter *letter;
|
||||
unsigned int c, alloc = 0, done = 0, len;
|
||||
const char *src = devanagari;
|
||||
char *latin = NULL;
|
||||
|
||||
table = get_iast_transliteration_table();
|
||||
|
||||
retval = strdup(""); /* FIXME */
|
||||
while (1) {
|
||||
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||
latin = realloc(latin, alloc + CHUNKSIZE);
|
||||
alloc += CHUNKSIZE;
|
||||
}
|
||||
|
||||
return retval;
|
||||
c = utf8_unpack_char(src);
|
||||
len = utf8_char_length(c);
|
||||
src += len;
|
||||
|
||||
letter = letter_by_code(table, c);
|
||||
if (letter) {
|
||||
switch (letter->type) {
|
||||
case CONSONANT:
|
||||
strcpy(latin + done, letter->data);
|
||||
done += strlen(letter->data);
|
||||
*(latin + done++) = SCHWA_CHARACTER;
|
||||
break;
|
||||
case VOWEL_SIGN:
|
||||
if (done)
|
||||
done--;
|
||||
strcpy(latin + done, letter->data);
|
||||
done += strlen(letter->data);
|
||||
break;
|
||||
default:
|
||||
strcpy(latin + done, letter->data);
|
||||
done += strlen(letter->data);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
utf8_pack_char(latin + done, c);
|
||||
done += len;
|
||||
}
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return latin;
|
||||
}
|
||||
|
|
2
utf8.h
2
utf8.h
|
@ -3,6 +3,8 @@
|
|||
#ifndef __UTF8_H
|
||||
#define __UTF8_H
|
||||
|
||||
#define UNICODE_MAX_LENGTH 4
|
||||
|
||||
unsigned int utf8_unpack_char(const char *src);
|
||||
void utf8_pack_char(char *dest, unsigned int c);
|
||||
|
||||
|
|
Loading…
Reference in a new issue