Numerous updates:
* Add makefile to install data * Reference text is now formatted * UTF-8 string processing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,4 @@
|
||||
Makefile
|
||||
Makefile.bak
|
||||
Makefile.in
|
||||
aclocal.m4
|
||||
autom4te.cache/
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
SUBDIRS = src
|
||||
SUBDIRS = src data
|
||||
dist_doc_DATA = README
|
||||
|
||||
@@ -9,5 +9,6 @@ AC_CONFIG_HEADERS([config.h])
|
||||
AC_CONFIG_FILES([
|
||||
Makefile
|
||||
src/Makefile
|
||||
data/Makefile
|
||||
])
|
||||
AC_OUTPUT
|
||||
|
||||
2
data/Makefile.am
Normal file
2
data/Makefile.am
Normal file
@@ -0,0 +1,2 @@
|
||||
pkgdata_DATA = *.xsl sermon.dtd
|
||||
EXTRA_DIST = *.xsl sermon.dtd
|
||||
@@ -1,8 +1,8 @@
|
||||
AM_CPPFLAGS = ${libxml2_CFLAGS} ${libxslt_CFLAGS}
|
||||
AM_CPPFLAGS = ${libxml2_CFLAGS} ${libxslt_CFLAGS} -DDATADIR=\"$(pkgdatadir)\"
|
||||
bin_PROGRAMS = sermon
|
||||
BUILT_SOURCES = sermon_lexer.c sermon_parser.c sermon_parser.h
|
||||
AM_YFLAGS = -d --location
|
||||
sermon_SOURCES = sermon_lexer.l sermon_parser.y sermon_util.c main.c
|
||||
sermon_SOURCES = sermon_lexer.l sermon_parser.y sermon_util.c main.c xml.c
|
||||
sermon_LDADD = ${libxml2_LIBS} ${libxslt_LIBS}
|
||||
CLEANFILES = sermon_lexer.c sermon_parser.c sermon_parser.h
|
||||
LIBS = $(LEXLIB)
|
||||
|
||||
11
src/main.c
11
src/main.c
@@ -1,7 +1,9 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <libxml/tree.h>
|
||||
#include "sermon.h"
|
||||
#include "xml.h"
|
||||
|
||||
extern int yyparse(Sermon *);
|
||||
extern FILE* yyin;
|
||||
@@ -16,6 +18,7 @@ void usage(const char* progname) {
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Sermon sermon;
|
||||
xmlDocPtr document;
|
||||
int i = 0, block = 0, normal = 0;
|
||||
const char* progname = argv[0], *filename = NULL;
|
||||
while (++i < argc) {
|
||||
@@ -38,6 +41,7 @@ int main(int argc, char* argv[]) {
|
||||
yyin = fopen(argv[1], "rt");
|
||||
}
|
||||
yyparse(&sermon);
|
||||
/*
|
||||
printf("Parsed sermon.\n");
|
||||
printf("TITLE=%s\n", sermon.sermonTitle ? sermon.sermonTitle : "none");
|
||||
printf("AUTHOR=%s\n", sermon.sermonAuthor ? sermon.sermonAuthor : "none");
|
||||
@@ -55,6 +59,13 @@ int main(int argc, char* argv[]) {
|
||||
printf(" - %s: %s\n", sermon.sermonReferences[i].refId, sermon.sermonReferences[i].refText);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
document = sermonToXmlDoc(&sermon);
|
||||
printXML(document);
|
||||
xmlFreeDoc(document);
|
||||
|
||||
/* clean up, clean up, everybody, everywhere! */
|
||||
FreeSermon(&sermon);
|
||||
if (strcmp(filename, "-") != 0) {
|
||||
fclose(yyin);
|
||||
|
||||
@@ -18,7 +18,7 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
char* refId;
|
||||
char* refText;
|
||||
SermonParagraph refText;
|
||||
} SermonReference;
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -126,7 +126,7 @@ references:
|
||||
| /* empty */
|
||||
;
|
||||
reference:
|
||||
'{' KW_REF ':' ID ':' REFTEXT '}' { SermonReference r = { .refId = $4, .refText = $6 }; APPEND_QUEUE(ReferenceQueue, referenceQ, r); }
|
||||
'{' KW_REF ':' ID ':' REFTEXT '}' { SermonReference r = { .refId = $4, .refText = { .paraType = PARA_DEFAULT, .paraText = $6 } }; APPEND_QUEUE(ReferenceQueue, referenceQ, r); }
|
||||
;
|
||||
%%
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ void FreeSermon(Sermon* srm) {
|
||||
if (srm->numReferences) {
|
||||
for (i = 0; i < srm->numReferences; i++) {
|
||||
free(srm->sermonReferences[i].refId);
|
||||
free(srm->sermonReferences[i].refText);
|
||||
free(srm->sermonReferences[i].refText.paraText);
|
||||
}
|
||||
free(srm->sermonReferences);
|
||||
}
|
||||
|
||||
124
src/utf8.c
Normal file
124
src/utf8.c
Normal file
@@ -0,0 +1,124 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "utf8.h"
|
||||
|
||||
utf8iterator*
|
||||
utf8NewIterator(const char* txt) {
|
||||
utf8iterator* result = malloc(sizeof(utf8iterator));
|
||||
memset(result, 0, sizeof(utf8iterator));
|
||||
result->txt = txt;
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t utf8CharAt(const utf8iterator* iter) {
|
||||
int byteIndex = iter->byteIndex;
|
||||
if ((iter->txt[byteIndex] & 0x80) == 0) {
|
||||
return iter->txt[byteIndex];
|
||||
} else if (((iter->txt[byteIndex] & 0xe0) == 0xc0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80)) {
|
||||
return (uint32_t)(iter->txt[byteIndex + 1] & 0x3f) |
|
||||
((uint32_t)(iter->txt[byteIndex] & 0x1f) << 6);
|
||||
} else if (((iter->txt[byteIndex] & 0xf0) == 0xe0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80)) {
|
||||
return (uint32_t)(iter->txt[byteIndex + 2] & 0x3f) |
|
||||
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 6) |
|
||||
((uint32_t)(iter->txt[byteIndex] & 0xf) << 12);
|
||||
} else if (((iter->txt[byteIndex] & 0xf8) == 0xf0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80)) {
|
||||
return (uint32_t)(iter->txt[byteIndex + 3] & 0x3f) |
|
||||
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 6) |
|
||||
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 12) |
|
||||
((uint32_t)(iter->txt[byteIndex] & 0x07) << 18);
|
||||
} else if (((iter->txt[byteIndex] & 0xfc) == 0xf8) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) {
|
||||
return (uint32_t)(iter->txt[byteIndex + 4] & 0x3f) |
|
||||
((uint32_t)(iter->txt[byteIndex + 3] & 0x3f) << 6) |
|
||||
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 12) |
|
||||
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 18) |
|
||||
((uint32_t)(iter->txt[byteIndex] & 0x03) << 24);
|
||||
|
||||
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
|
||||
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 5] & 0xc0) == 0x80)) {
|
||||
return (uint32_t)(iter->txt[byteIndex + 5] & 0x3f) |
|
||||
((uint32_t)(iter->txt[byteIndex + 4] & 0x3f) << 6) |
|
||||
((uint32_t)(iter->txt[byteIndex + 3] & 0x3f) << 12) |
|
||||
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 18) |
|
||||
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 24) |
|
||||
((uint32_t)(iter->txt[byteIndex] & 0x01) << 30);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
_next_offset(const utf8iterator* iter) {
|
||||
int byteIndex = iter->byteIndex;
|
||||
if ((iter->txt[byteIndex] & 0x80) == 0) {
|
||||
return 1;
|
||||
} else if (((iter->txt[byteIndex] & 0xe0) == 0xc0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80)) {
|
||||
return 2;
|
||||
} else if (((iter->txt[byteIndex] & 0xf0) == 0xe0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80)) {
|
||||
return 3;
|
||||
} else if (((iter->txt[byteIndex] & 0xf8) == 0xf0) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80)) {
|
||||
return 4;
|
||||
} else if (((iter->txt[byteIndex] & 0xfc) == 0xf8) &&
|
||||
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) {
|
||||
return 5;
|
||||
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
|
||||
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) &&
|
||||
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&
|
||||
((iter->txt[byteIndex + 5] & 0xc0) == 0x80)) {
|
||||
return 6;
|
||||
} else return -1;
|
||||
}
|
||||
|
||||
int
|
||||
utf8Advance(utf8iterator* iter) {
|
||||
int next_offset = _next_offset(iter);
|
||||
if (next_offset > 0) {
|
||||
iter->byteIndex += next_offset;
|
||||
return ++iter->logicalIndex;
|
||||
} else return -1;
|
||||
}
|
||||
|
||||
int
|
||||
utf8CopyEncodedCharAt(const utf8iterator* iter, size_t bufsize, char* dest) {
|
||||
size_t sz = _next_offset(iter);
|
||||
if (sz + 1 > bufsize) {
|
||||
return 0;
|
||||
} else {
|
||||
memset(dest, 0, bufsize);
|
||||
memcpy(dest, iter->txt + iter->byteIndex, sz);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
utf8ResetIterator(utf8iterator* iter) {
|
||||
iter->logicalIndex = iter->byteIndex = 0;
|
||||
}
|
||||
|
||||
void
|
||||
utf8FreeIterator(utf8iterator* iter) {
|
||||
free(iter);
|
||||
}
|
||||
19
src/utf8.h
Normal file
19
src/utf8.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef _UTF8_H
|
||||
#define _UTF8_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct {
|
||||
const char* txt;
|
||||
int byteIndex;
|
||||
int logicalIndex;
|
||||
} utf8iterator;
|
||||
|
||||
utf8iterator* utf8NewIterator(const char* txt);
|
||||
int utf8Advance(utf8iterator*);
|
||||
uint32_t utf8CharAt(const utf8iterator*);
|
||||
void utf8ResetIterator(utf8iterator*);
|
||||
void utf8FreeIterator(utf8iterator*);
|
||||
int utf8CopyEncodedCharAt(const utf8iterator* iter, size_t bufsize, char* dest);
|
||||
|
||||
#endif /* !def _UTF8_H */
|
||||
99
src/xml.c
Normal file
99
src/xml.c
Normal file
@@ -0,0 +1,99 @@
|
||||
#include <libxml/tree.h>
|
||||
#include "sermon.h"
|
||||
|
||||
static void
|
||||
appendHeaderNode(xmlNodePtr headerNode, const char* headerName,
|
||||
const char* headerText) {
|
||||
if (headerText) {
|
||||
xmlNodePtr ptr = xmlNewNode(NULL, headerName);
|
||||
xmlAddChild(ptr, xmlNewText(headerText));
|
||||
xmlAddChild(headerNode, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
xmlNodePtr
|
||||
sermonHeader(const Sermon* srm) {
|
||||
xmlNodePtr header = xmlNewNode(NULL, "header");
|
||||
appendHeaderNode(header, "title", srm->sermonTitle);
|
||||
appendHeaderNode(header, "author", srm->sermonAuthor);
|
||||
appendHeaderNode(header, "occasion", srm->sermonOccasion);
|
||||
appendHeaderNode(header, "date", srm->sermonDate);
|
||||
appendHeaderNode(header, "text", srm->sermonText);
|
||||
return header;
|
||||
}
|
||||
|
||||
|
||||
static xmlNodePtr
|
||||
paragraphToXML(const SermonParagraph* p) {
|
||||
xmlNodePtr result = xmlNewNode(NULL, "p");
|
||||
xmlAddChild(result, xmlNewText(p->paraText));
|
||||
return result;
|
||||
}
|
||||
|
||||
xmlNodePtr
|
||||
sermonBody(const Sermon* srm) {
|
||||
xmlNodePtr body = xmlNewNode(NULL, "body");
|
||||
xmlNodePtr block = NULL;
|
||||
int i = 0;
|
||||
for (i = 0; i < srm->numParagraphs; i++) {
|
||||
const SermonParagraph* p = &srm->sermonParagraphs[i];
|
||||
xmlNodePtr para = paragraphToXML(p);
|
||||
if (p->paraType == PARA_BLOCKQUOTE) {
|
||||
if (!block) {
|
||||
block = xmlNewNode(NULL, "quote");
|
||||
xmlAddChild(body, block);
|
||||
}
|
||||
xmlAddChild(block, para);
|
||||
} else {
|
||||
block = NULL;
|
||||
xmlAddChild(body, para);
|
||||
}
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
xmlNodePtr
|
||||
sermonFooter(const Sermon* srm) {
|
||||
xmlNodePtr footer = xmlNewNode(NULL, "footer");
|
||||
int i = 0;
|
||||
char num[10];
|
||||
for (i = 0; i < srm->numReferences; i++) {
|
||||
const SermonReference* r = &srm->sermonReferences[i];
|
||||
xmlNodePtr ref = xmlNewNode(NULL, "ref");
|
||||
snprintf(num, 10, "%d", i + 1);
|
||||
xmlNewProp(ref, "number", num);
|
||||
xmlAddChild(ref, paragraphToXML(&r->refText));
|
||||
xmlAddChild(footer, ref);
|
||||
}
|
||||
return footer;
|
||||
}
|
||||
|
||||
xmlDocPtr
|
||||
sermonToXmlDoc(const Sermon* srm) {
|
||||
/* document creation and setup */
|
||||
xmlDocPtr document = xmlNewDoc("1.0");
|
||||
xmlDtdPtr dtd = xmlCreateIntSubset(document, "sermon", NULL, DATADIR "/sermon.dtd");
|
||||
xmlNodePtr sermon = xmlNewNode(NULL, "sermon");
|
||||
xmlNsPtr sermon_ns = xmlNewNs(sermon, "urn:david-sermon", NULL);
|
||||
xmlDocSetRootElement(document, sermon);
|
||||
|
||||
/* add header */
|
||||
xmlAddChild(sermon, sermonHeader(srm));
|
||||
|
||||
/* add body paragraphs */
|
||||
xmlAddChild(sermon, sermonBody(srm));
|
||||
|
||||
if (srm->numReferences) {
|
||||
/* add footer */
|
||||
xmlAddChild(sermon, sermonFooter(srm));
|
||||
}
|
||||
|
||||
return document;
|
||||
}
|
||||
|
||||
void
|
||||
printXML(xmlDocPtr document) {
|
||||
xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("utf-8");
|
||||
xmlOutputBufferPtr output = xmlOutputBufferCreateFd(1, encoding);
|
||||
xmlSaveFileTo(output, document, "utf-8");
|
||||
}
|
||||
Reference in New Issue
Block a user