reimplement devanagari to latin transliteration
This commit is contained in:
parent
94388aea07
commit
f4d545f4d1
6 changed files with 67 additions and 5 deletions
2
Makefile
2
Makefile
|
@ -12,7 +12,7 @@ TEST_LIBS = $(LIBS) $(shell pkg-config --libs check)
|
||||||
|
|
||||||
all: iast doc/iast.1.gz
|
all: iast doc/iast.1.gz
|
||||||
|
|
||||||
iast: main.o $(OBJFILES)
|
iast: main.o $(OBJECTS)
|
||||||
$(CC) $^ -o $@ $(CFLAGS)
|
$(CC) $^ -o $@ $(CFLAGS)
|
||||||
|
|
||||||
test: tests/test
|
test: tests/test
|
||||||
|
|
1
main.c
1
main.c
|
@ -6,7 +6,6 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "transliteration.h"
|
#include "transliteration.h"
|
||||||
#include "iast.h"
|
|
||||||
#include "encoder.h"
|
#include "encoder.h"
|
||||||
|
|
||||||
#define PROGNAME "iast"
|
#define PROGNAME "iast"
|
||||||
|
|
|
@ -24,8 +24,14 @@ int main(void)
|
||||||
suite = create_test_suite();
|
suite = create_test_suite();
|
||||||
runner = srunner_create(suite);
|
runner = srunner_create(suite);
|
||||||
|
|
||||||
|
puts("-----------------------------------------");
|
||||||
srunner_run_all(runner, CK_NORMAL);
|
srunner_run_all(runner, CK_NORMAL);
|
||||||
retval = srunner_ntests_failed(runner);
|
retval = srunner_ntests_failed(runner);
|
||||||
|
puts("-----------------------------------------");
|
||||||
|
|
||||||
|
puts(retval == 0 ? "\033[32mpassed\033[0m\n"
|
||||||
|
: "\033[31mfailed\033[0m\n");
|
||||||
|
|
||||||
srunner_free(runner);
|
srunner_free(runner);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#ifndef __TEST_TEST_H
|
#ifndef __TEST_TEST_H
|
||||||
#define __TEST_TEST_H
|
#define __TEST_TEST_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <check.h>
|
#include <check.h>
|
||||||
|
|
||||||
|
|
|
@ -5,14 +5,68 @@
|
||||||
#include "iast.h"
|
#include "iast.h"
|
||||||
#include "utf8.h"
|
#include "utf8.h"
|
||||||
|
|
||||||
|
#define SCHWA_CHARACTER 'a'
|
||||||
|
#define CHUNKSIZE 1024
|
||||||
|
|
||||||
|
static struct translit_letter *letter_by_code(struct translit_letter *table,
|
||||||
|
unsigned int c)
|
||||||
|
{
|
||||||
|
while (table->code != 0) {
|
||||||
|
if (table->code == c)
|
||||||
|
return table;
|
||||||
|
table++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
char *transliterate_devanagari_to_latin(const char *devanagari)
|
char *transliterate_devanagari_to_latin(const char *devanagari)
|
||||||
{
|
{
|
||||||
struct translit_letter *table;
|
struct translit_letter *table;
|
||||||
char *retval;
|
struct translit_letter *letter;
|
||||||
|
unsigned int c, alloc = 0, done = 0, len;
|
||||||
|
const char *src = devanagari;
|
||||||
|
char *latin = NULL;
|
||||||
|
|
||||||
table = get_iast_transliteration_table();
|
table = get_iast_transliteration_table();
|
||||||
|
|
||||||
retval = strdup(""); /* FIXME */
|
while (1) {
|
||||||
|
if (alloc < done + UNICODE_MAX_LENGTH) {
|
||||||
|
latin = realloc(latin, alloc + CHUNKSIZE);
|
||||||
|
alloc += CHUNKSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
return retval;
|
c = utf8_unpack_char(src);
|
||||||
|
len = utf8_char_length(c);
|
||||||
|
src += len;
|
||||||
|
|
||||||
|
letter = letter_by_code(table, c);
|
||||||
|
if (letter) {
|
||||||
|
switch (letter->type) {
|
||||||
|
case CONSONANT:
|
||||||
|
strcpy(latin + done, letter->data);
|
||||||
|
done += strlen(letter->data);
|
||||||
|
*(latin + done++) = SCHWA_CHARACTER;
|
||||||
|
break;
|
||||||
|
case VOWEL_SIGN:
|
||||||
|
if (done)
|
||||||
|
done--;
|
||||||
|
strcpy(latin + done, letter->data);
|
||||||
|
done += strlen(letter->data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
strcpy(latin + done, letter->data);
|
||||||
|
done += strlen(letter->data);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
utf8_pack_char(latin + done, c);
|
||||||
|
done += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return latin;
|
||||||
}
|
}
|
||||||
|
|
2
utf8.h
2
utf8.h
|
@ -3,6 +3,8 @@
|
||||||
#ifndef __UTF8_H
|
#ifndef __UTF8_H
|
||||||
#define __UTF8_H
|
#define __UTF8_H
|
||||||
|
|
||||||
|
#define UNICODE_MAX_LENGTH 4
|
||||||
|
|
||||||
unsigned int utf8_unpack_char(const char *src);
|
unsigned int utf8_unpack_char(const char *src);
|
||||||
void utf8_pack_char(char *dest, unsigned int c);
|
void utf8_pack_char(char *dest, unsigned int c);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue