Numerous updates:

* Add makefile to install data

* Reference text is now formatted

* UTF-8 string processing
This commit is contained in:
David Baer
2015-08-06 14:31:38 -04:00
parent af38b1eafc
commit cfc0ba7e9a
13 changed files with 269 additions and 7 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,4 @@
Makefile
Makefile.bak
Makefile.in
aclocal.m4
autom4te.cache/

View File

@@ -1,2 +1,2 @@
SUBDIRS = src
SUBDIRS = src data
dist_doc_DATA = README

View File

@@ -9,5 +9,6 @@ AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([
Makefile
src/Makefile
data/Makefile
])
AC_OUTPUT

2
data/Makefile.am Normal file
View File

@@ -0,0 +1,2 @@
pkgdata_DATA = *.xsl sermon.dtd
EXTRA_DIST = *.xsl sermon.dtd

View File

@@ -1,8 +1,8 @@
AM_CPPFLAGS = ${libxml2_CFLAGS} ${libxslt_CFLAGS}
AM_CPPFLAGS = ${libxml2_CFLAGS} ${libxslt_CFLAGS} -DDATADIR=\"$(pkgdatadir)\"
bin_PROGRAMS = sermon
BUILT_SOURCES = sermon_lexer.c sermon_parser.c sermon_parser.h
AM_YFLAGS = -d --location
sermon_SOURCES = sermon_lexer.l sermon_parser.y sermon_util.c main.c
sermon_SOURCES = sermon_lexer.l sermon_parser.y sermon_util.c main.c xml.c
sermon_LDADD = ${libxml2_LIBS} ${libxslt_LIBS}
CLEANFILES = sermon_lexer.c sermon_parser.c sermon_parser.h
LIBS = $(LEXLIB)

View File

@@ -1,7 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libxml/tree.h>
#include "sermon.h"
#include "xml.h"
extern int yyparse(Sermon *);
extern FILE* yyin;
@@ -16,6 +18,7 @@ void usage(const char* progname) {
int main(int argc, char* argv[]) {
Sermon sermon;
xmlDocPtr document;
int i = 0, block = 0, normal = 0;
const char* progname = argv[0], *filename = NULL;
while (++i < argc) {
@@ -38,6 +41,7 @@ int main(int argc, char* argv[]) {
yyin = fopen(argv[1], "rt");
}
yyparse(&sermon);
/*
printf("Parsed sermon.\n");
printf("TITLE=%s\n", sermon.sermonTitle ? sermon.sermonTitle : "none");
printf("AUTHOR=%s\n", sermon.sermonAuthor ? sermon.sermonAuthor : "none");
@@ -55,6 +59,13 @@ int main(int argc, char* argv[]) {
printf(" - %s: %s\n", sermon.sermonReferences[i].refId, sermon.sermonReferences[i].refText);
}
printf("\n");
*/
document = sermonToXmlDoc(&sermon);
printXML(document);
xmlFreeDoc(document);
/* clean up, clean up, everybody, everywhere! */
FreeSermon(&sermon);
if (strcmp(filename, "-") != 0) {
fclose(yyin);

View File

@@ -18,7 +18,7 @@ typedef struct {
typedef struct {
char* refId;
char* refText;
SermonParagraph refText;
} SermonReference;
typedef struct {

View File

@@ -126,7 +126,7 @@ references:
| /* empty */
;
reference:
'{' KW_REF ':' ID ':' REFTEXT '}' { SermonReference r = { .refId = $4, .refText = $6 }; APPEND_QUEUE(ReferenceQueue, referenceQ, r); }
'{' KW_REF ':' ID ':' REFTEXT '}' { SermonReference r = { .refId = $4, .refText = { .paraType = PARA_DEFAULT, .paraText = $6 } }; APPEND_QUEUE(ReferenceQueue, referenceQ, r); }
;
%%

View File

@@ -23,7 +23,7 @@ void FreeSermon(Sermon* srm) {
if (srm->numReferences) {
for (i = 0; i < srm->numReferences; i++) {
free(srm->sermonReferences[i].refId);
free(srm->sermonReferences[i].refText);
free(srm->sermonReferences[i].refText.paraText);
}
free(srm->sermonReferences);
}

124
src/utf8.c Normal file
View File

@@ -0,0 +1,124 @@
#include <stdlib.h>
#include <string.h>
#include "utf8.h"
utf8iterator*
utf8NewIterator(const char* txt) {
utf8iterator* result = malloc(sizeof(utf8iterator));
memset(result, 0, sizeof(utf8iterator));
result->txt = txt;
return result;
}
uint32_t utf8CharAt(const utf8iterator* iter) {
int byteIndex = iter->byteIndex;
if ((iter->txt[byteIndex] & 0x80) == 0) {
return iter->txt[byteIndex];
} else if (((iter->txt[byteIndex] & 0xe0) == 0xc0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80)) {
return (uint32_t)(iter->txt[byteIndex + 1] & 0x3f) |
((uint32_t)(iter->txt[byteIndex] & 0x1f) << 6);
} else if (((iter->txt[byteIndex] & 0xf0) == 0xe0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80)) {
return (uint32_t)(iter->txt[byteIndex + 2] & 0x3f) |
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 6) |
((uint32_t)(iter->txt[byteIndex] & 0xf) << 12);
} else if (((iter->txt[byteIndex] & 0xf8) == 0xf0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80)) {
return (uint32_t)(iter->txt[byteIndex + 3] & 0x3f) |
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 6) |
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 12) |
((uint32_t)(iter->txt[byteIndex] & 0x07) << 18);
} else if (((iter->txt[byteIndex] & 0xfc) == 0xf8) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) {
return (uint32_t)(iter->txt[byteIndex + 4] & 0x3f) |
((uint32_t)(iter->txt[byteIndex + 3] & 0x3f) << 6) |
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 12) |
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 18) |
((uint32_t)(iter->txt[byteIndex] & 0x03) << 24);
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 5] & 0xc0) == 0x80)) {
return (uint32_t)(iter->txt[byteIndex + 5] & 0x3f) |
((uint32_t)(iter->txt[byteIndex + 4] & 0x3f) << 6) |
((uint32_t)(iter->txt[byteIndex + 3] & 0x3f) << 12) |
((uint32_t)(iter->txt[byteIndex + 2] & 0x3f) << 18) |
((uint32_t)(iter->txt[byteIndex + 1] & 0x3f) << 24) |
((uint32_t)(iter->txt[byteIndex] & 0x01) << 30);
}
return 0;
}
static int
_next_offset(const utf8iterator* iter) {
int byteIndex = iter->byteIndex;
if ((iter->txt[byteIndex] & 0x80) == 0) {
return 1;
} else if (((iter->txt[byteIndex] & 0xe0) == 0xc0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80)) {
return 2;
} else if (((iter->txt[byteIndex] & 0xf0) == 0xe0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80)) {
return 3;
} else if (((iter->txt[byteIndex] & 0xf8) == 0xf0) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80)) {
return 4;
} else if (((iter->txt[byteIndex] & 0xfc) == 0xf8) &&
((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) {
return 5;
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 5] & 0xc0) == 0x80)) {
return 6;
} else return -1;
}
int
utf8Advance(utf8iterator* iter) {
int next_offset = _next_offset(iter);
if (next_offset > 0) {
iter->byteIndex += next_offset;
return ++iter->logicalIndex;
} else return -1;
}
int
utf8CopyEncodedCharAt(const utf8iterator* iter, size_t bufsize, char* dest) {
size_t sz = _next_offset(iter);
if (sz + 1 > bufsize) {
return 0;
} else {
memset(dest, 0, bufsize);
memcpy(dest, iter->txt + iter->byteIndex, sz);
return 1;
}
}
void
utf8ResetIterator(utf8iterator* iter) {
iter->logicalIndex = iter->byteIndex = 0;
}
void
utf8FreeIterator(utf8iterator* iter) {
free(iter);
}

19
src/utf8.h Normal file
View File

@@ -0,0 +1,19 @@
#ifndef _UTF8_H
#define _UTF8_H
#include <stdint.h>
typedef struct {
const char* txt;
int byteIndex;
int logicalIndex;
} utf8iterator;
utf8iterator* utf8NewIterator(const char* txt);
int utf8Advance(utf8iterator*);
uint32_t utf8CharAt(const utf8iterator*);
void utf8ResetIterator(utf8iterator*);
void utf8FreeIterator(utf8iterator*);
int utf8CopyEncodedCharAt(const utf8iterator* iter, size_t bufsize, char* dest);
#endif /* !def _UTF8_H */

99
src/xml.c Normal file
View File

@@ -0,0 +1,99 @@
#include <libxml/tree.h>
#include "sermon.h"
static void
appendHeaderNode(xmlNodePtr headerNode, const char* headerName,
const char* headerText) {
if (headerText) {
xmlNodePtr ptr = xmlNewNode(NULL, headerName);
xmlAddChild(ptr, xmlNewText(headerText));
xmlAddChild(headerNode, ptr);
}
}
xmlNodePtr
sermonHeader(const Sermon* srm) {
xmlNodePtr header = xmlNewNode(NULL, "header");
appendHeaderNode(header, "title", srm->sermonTitle);
appendHeaderNode(header, "author", srm->sermonAuthor);
appendHeaderNode(header, "occasion", srm->sermonOccasion);
appendHeaderNode(header, "date", srm->sermonDate);
appendHeaderNode(header, "text", srm->sermonText);
return header;
}
static xmlNodePtr
paragraphToXML(const SermonParagraph* p) {
xmlNodePtr result = xmlNewNode(NULL, "p");
xmlAddChild(result, xmlNewText(p->paraText));
return result;
}
xmlNodePtr
sermonBody(const Sermon* srm) {
xmlNodePtr body = xmlNewNode(NULL, "body");
xmlNodePtr block = NULL;
int i = 0;
for (i = 0; i < srm->numParagraphs; i++) {
const SermonParagraph* p = &srm->sermonParagraphs[i];
xmlNodePtr para = paragraphToXML(p);
if (p->paraType == PARA_BLOCKQUOTE) {
if (!block) {
block = xmlNewNode(NULL, "quote");
xmlAddChild(body, block);
}
xmlAddChild(block, para);
} else {
block = NULL;
xmlAddChild(body, para);
}
}
return body;
}
xmlNodePtr
sermonFooter(const Sermon* srm) {
xmlNodePtr footer = xmlNewNode(NULL, "footer");
int i = 0;
char num[10];
for (i = 0; i < srm->numReferences; i++) {
const SermonReference* r = &srm->sermonReferences[i];
xmlNodePtr ref = xmlNewNode(NULL, "ref");
snprintf(num, 10, "%d", i + 1);
xmlNewProp(ref, "number", num);
xmlAddChild(ref, paragraphToXML(&r->refText));
xmlAddChild(footer, ref);
}
return footer;
}
xmlDocPtr
sermonToXmlDoc(const Sermon* srm) {
/* document creation and setup */
xmlDocPtr document = xmlNewDoc("1.0");
xmlDtdPtr dtd = xmlCreateIntSubset(document, "sermon", NULL, DATADIR "/sermon.dtd");
xmlNodePtr sermon = xmlNewNode(NULL, "sermon");
xmlNsPtr sermon_ns = xmlNewNs(sermon, "urn:david-sermon", NULL);
xmlDocSetRootElement(document, sermon);
/* add header */
xmlAddChild(sermon, sermonHeader(srm));
/* add body paragraphs */
xmlAddChild(sermon, sermonBody(srm));
if (srm->numReferences) {
/* add footer */
xmlAddChild(sermon, sermonFooter(srm));
}
return document;
}
void
printXML(xmlDocPtr document) {
xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("utf-8");
xmlOutputBufferPtr output = xmlOutputBufferCreateFd(1, encoding);
xmlSaveFileTo(output, document, "utf-8");
}

7
src/xml.h Normal file
View File

@@ -0,0 +1,7 @@
#ifndef _XML_H
#define _XML_H
xmlDocPtr sermonToXmlDoc(const Sermon*);
void printXML(xmlDocPtr);
#endif /* !def _XML_H */