add punctation encoder

This commit is contained in:
Vlasta Vesely 2018-05-18 10:52:11 +02:00
parent 8c4061e171
commit 93114de4a7
3 changed files with 81 additions and 1 deletions

View file

@ -1,6 +1,6 @@
.PHONY: main test install uninstall clean .PHONY: main test install uninstall clean
OBJS = syllable.o utf8.o transliteration.o iast.o iast-czech.o OBJS = syllable.o utf8.o transliteration.o iast.o iast-czech.o encoder.o
iast: main.o $(OBJS) iast: main.o $(OBJS)

72
encoder.c Normal file
View file

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "encoder.h"
#include "utf8.h"
struct encoder_tuple {
const char *from;
const char *to;
};
static const struct encoder_tuple table[] = {
{"a-", "ā"},
{"i-", "ī"},
{"u-", "ū"},
{"r.", ""},
{"r.-", ""},
{"l.-", ""},
{"n^.", ""},
{"n~", "ñ"},
{"s,", "ś"},
{"t.", ""},
{"d.", ""},
{"n.", ""},
{"s.", ""},
{"m.", ""},
{"h.", ""},
{NULL, NULL}
};
const struct encoder_tuple *find_tuple(const char *text)
{
const struct encoder_tuple *walk = table;
while (walk->from != NULL) {
if (strncmp(text, walk->from, strlen(walk->from)) == 0) {
return walk;
}
walk++;
}
return NULL;
}
char *encode_iast_punctation(const char *text)
{
const char *str = text, *end = str + strlen(str);
const struct encoder_tuple *tuple;
char *buf, *dest;
buf = malloc(strlen(text) << 1);
buf[0] = 0;
dest = buf;
while (str < end) {
tuple = find_tuple(str);
if (tuple) {
sprintf(dest, "%s", tuple->to);
str += strlen(tuple->from);
dest += strlen(tuple->to);
} else {
sprintf(dest, "%c", *str);
str++;
dest++;
}
}
return buf;
}

8
encoder.h Normal file
View file

@ -0,0 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ENCODER_H
#define __ENCODER_H
char *encode_iast_punctation(const char *text);
#endif /* __ENCODER_H */