Use stylesheet transformations to output HTML, for example

2015-08-11 15:33:08 -04:00
parent cfc0ba7e9a
commit 10927bee25
19 changed files with 725 additions and 92 deletions
--- a/data/bootstrap.xsl
+++ b/data/bootstrap.xsl
@@ -76,6 +76,7 @@
   <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
     <head>
       <title><xsl:value-of select="ser:header/ser:title"/></title>
+       <meta charset="utf-8" />
     </head>
     <body>
       <div id="header">
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,8 +1,21 @@
 AM_CPPFLAGS = ${libxml2_CFLAGS} ${libxslt_CFLAGS} -DDATADIR=\"$(pkgdatadir)\"
 bin_PROGRAMS = sermon
-BUILT_SOURCES = sermon_lexer.c sermon_parser.c sermon_parser.h
+BUILT_SOURCES = sermon_lexer.c \
+				sermon_parser.c \
+				sermon_parser.h
 AM_YFLAGS = -d --location
-sermon_SOURCES = sermon_lexer.l sermon_parser.y sermon_util.c main.c xml.c
+sermon_SOURCES = citations.c \
+				 format.c \
+				 main.c \
+				 options.c \
+				 sermon_lexer.l \
+				 sermon_parser.y \
+				 sermon_util.c \
+				 utf8.c \
+				 xml.c \
+				 xslt.c
 sermon_LDADD = ${libxml2_LIBS} ${libxslt_LIBS}
-CLEANFILES = sermon_lexer.c sermon_parser.c sermon_parser.h
+CLEANFILES = sermon_lexer.c \
+			 sermon_parser.c \
+			 sermon_parser.h
 LIBS = $(LEXLIB)
--- a/src/citations.c
+++ b/src/citations.c
@@ -0,0 +1,20 @@
+#include <string.h>
+#include "citations.h"
+
+void
+addCitation(CitationRecordQueue* q, const char* refId) {
+    int refNum = QUEUE_LENGTH(*q) + 1;
+    CitationRecord rec = { .refNum = refNum, .refId = strdup(refId) };
+    APPEND_QUEUE(CitationRecordQueue, *q, rec);
+}
+
+int
+lookupCitation(CitationRecordQueue q, const char* refId) {
+    FOREACH_QUEUE(CitationRecordQueue, q, ptr) {
+        if (strcmp(ptr->data.refId, refId) == 0) {
+            return ptr->data.refNum;
+        }
+    }
+    FOREACH_QUEUE_END
+    return 0;
+}
--- a/src/citations.h
+++ b/src/citations.h
@@ -0,0 +1,30 @@
+/***************************************************************************
+ * citations.h - structures and routines for tracking sermon citations
+ *
+ * Copyright (C) 2015 by David A. Baer, All Rights Reserved
+ *
+ * Description:
+ *
+ *    In order to have footnotes/endnotes appear in the proper sequence,
+ *    we need to keep track of the order of the citations.  Current
+ *    implementation involves a list which is searched in linear time,
+ *    just because the number of citations is bound to be low enough
+ *    not to notice any performance improvement from say, a hash table 
+ *    or a splay tree.
+ *
+ */
+#ifndef _CITATIONS_H
+#define _CITATIONS_H
+
+#include "queue.h"
+
+typedef struct {
+    int   refNum;
+    char* refId;
+} CitationRecord;
+
+DEFINE_QUEUE(CitationRecord, CitationRecordQueue);
+void addCitation(CitationRecordQueue* q, const char* refId);
+int lookupCitation(CitationRecordQueue q, const char* refId);
+
+#endif /* !def _CITATIONS_H */
--- a/src/format.c
+++ b/src/format.c
@@ -0,0 +1,208 @@
+#include <string.h>
+#include "queue.h"
+#include "stack.h"
+#include "utf8.h"
+#include "format.h"
+
+DEFINE_QUEUE(FormatElement, FormatElementQueue);
+
+#define EM_DASH_UTF8 "\xe2\x80\x94"
+
+typedef utf8iterator* Tokenizer;
+typedef enum {
+    TOK_TEXT,
+    TOK_GREEK,
+    TOK_UNICODE,
+    TOK_STAR,
+    TOK_REF,
+    /*
+    TOK_DASH,
+    TOK_OPEN_DOUBLE_QUOTE,
+    TOK_CLOSE_DOUBLE_QUOTE,
+    TOK_OPEN_SINGLE_QUOTE,
+    TOK_CLOSE_SINGLE_QUOTE,
+    */
+    TOK_EOF
+} TokenType;
+typedef struct {
+    TokenType toktype;
+    char* toktext;
+} Token;
+
+static Tokenizer
+initializeTokenizer(const char* txt) {
+    return utf8NewIterator(txt);
+}
+
+static void
+freeTokenizer(utf8iterator* iter) {
+    utf8FreeIterator(iter);
+}
+
+inline int
+greekChar(uint32_t ch) {
+    return (((0x370 <= ch) && (ch <= 0x3ff)) ||
+            ((0x1f00 <= ch) && (ch <= 0x1fff)));
+}
+
+inline int
+extendedPunctuation(uint32_t ch) {
+    return ((0x2000 <= ch) && (ch <= 0x206f));
+}
+
+inline int
+latinChar(uint32_t ch) {
+    return (ch <= 0xff) || extendedPunctuation(ch);
+}
+
+static Token
+nextToken(Tokenizer tokenizer) {
+    int startIndex = tokenizer->byteIndex;
+    uint32_t ch = utf8CharAt(tokenizer);
+    Token result;
+    memset(&result, 0, sizeof(result));
+    if (ch == '\0') {
+        result.toktype = TOK_EOF;
+        result.toktext = NULL;
+        return result;
+    } else if (ch == '*') {
+        utf8Advance(tokenizer);
+        result.toktype = TOK_STAR;
+        result.toktext = NULL;
+        return result;
+    } else if (greekChar(ch)) {
+        while ((ch != 0) &&
+               (greekChar(ch) || (ch == ' ') || (ch == ',') || (ch == '.'))) {
+            utf8Advance(tokenizer);
+            ch = utf8CharAt(tokenizer);
+        }
+        result.toktype = TOK_GREEK;
+        result.toktext = strndup(tokenizer->txt + startIndex, tokenizer->byteIndex - startIndex);
+        return result;
+    } else if ((ch == '^') && (tokenizer->txt[tokenizer->byteIndex + 1] == '{')) {
+        int idStart = 0, idEnd = 0;
+        utf8Advance(tokenizer); /* to { */
+        utf8Advance(tokenizer); /* to id */
+        ch = utf8CharAt(tokenizer);
+        idStart = tokenizer->byteIndex;
+        while ((ch != 0) && (ch != '}')) {
+            utf8Advance(tokenizer);
+            ch = utf8CharAt(tokenizer);
+        }
+        idEnd = tokenizer->byteIndex;
+        /* reached end-of-string or } */
+        if (ch == '}')
+            utf8Advance(tokenizer);
+        result.toktype = TOK_REF;
+        result.toktext = strndup(tokenizer->txt + idStart, idEnd - idStart);
+        return result;
+    } else if (latinChar(ch)) {
+        while ((ch != 0) && latinChar(ch) && (ch != '*')) {
+            utf8Advance(tokenizer);
+            ch = utf8CharAt(tokenizer);
+            if (ch == '^') {
+                if (tokenizer->txt[tokenizer->byteIndex + 1] == '{') break;
+            }
+        }
+        result.toktype = TOK_TEXT;
+        result.toktext = strndup(tokenizer->txt + startIndex, tokenizer->byteIndex - startIndex);
+        return result;
+    } else {
+        while ((ch != 0) && (!latinChar(ch) || (ch == ' ') || (ch == ',') || (ch == '.') || extendedPunctuation(ch))) {
+            utf8Advance(tokenizer);
+            ch = utf8CharAt(tokenizer);
+        }
+        result.toktype = TOK_UNICODE;
+        result.toktext = strndup(tokenizer->txt + startIndex, tokenizer->byteIndex - startIndex);
+        return result;
+    }
+}
+
+DEFINE_STACK(FormatElementQueue, FEQueueStack);
+
+int formatText(const char* txt, FormatElement** dst, CitationRecordQueue* citationQPtr) {
+    Tokenizer tokenizer = initializeTokenizer(txt);
+    Token tok;
+    int listLength = 0, em = 0;
+    NEW_QUEUE(FormatElementQueue, formatElementQ);
+    NEW_STACK(FEQueueStack, st);
+    while ((tok = nextToken(tokenizer)).toktype != TOK_EOF) {
+        if (tok.toktype == TOK_STAR) {
+            if (em) { /* end emphasis */
+                FormatElement* content = NULL;
+                FormatElement elt = { .elementType = FORMAT_EM };
+                QUEUE_TO_ARRAY(FormatElementQueue, formatElementQ, FormatElement, content);
+                elt.elementContentLength = QUEUE_LENGTH(formatElementQ);
+                elt.elementContent.nestedContent = content;
+                formatElementQ = STACK_HEAD(st);
+                APPEND_QUEUE(FormatElementQueue, formatElementQ, elt);
+                POP_STACK(FEQueueStack, st);
+                em = 0;
+            } else { /* begin emphasis */
+                PUSH_STACK(FEQueueStack, st, formatElementQ);
+                REINIT_QUEUE(formatElementQ);
+                em = 1;
+            }
+        } else {
+            FormatElementType t;
+            FormatElement elt = { .elementContent = { .textContent = tok.toktext } } ;
+            if (tok.toktype == TOK_TEXT) {
+                t = FORMAT_TEXT;
+            } else if (tok.toktype == TOK_GREEK) {
+                t = FORMAT_GREEK;
+            } else if (tok.toktype == TOK_UNICODE) {
+                t = FORMAT_UNICODE;
+            } else if (tok.toktype == TOK_REF) {
+                t = FORMAT_CITATION;
+                if (citationQPtr && !lookupCitation(*citationQPtr, tok.toktext)) {
+                    addCitation(citationQPtr, tok.toktext);
+                }
+            }
+            elt.elementType = t;
+            APPEND_QUEUE(FormatElementQueue, formatElementQ, elt);
+        }
+    }
+    if (em) {
+        /* unmatched star -- close the emphasis here */
+        FormatElement* content = NULL;
+        FormatElement elt = { .elementType = FORMAT_EM };
+        QUEUE_TO_ARRAY(FormatElementQueue, formatElementQ, FormatElement, content);
+        elt.elementContentLength = QUEUE_LENGTH(formatElementQ);
+        elt.elementContent.nestedContent = content;
+        formatElementQ = STACK_HEAD(st);
+        APPEND_QUEUE(FormatElementQueue, formatElementQ, elt);
+        POP_STACK(FEQueueStack, st);
+    }
+
+    QUEUE_TO_ARRAY(FormatElementQueue, formatElementQ, FormatElement, *dst);
+    listLength = QUEUE_LENGTH(formatElementQ);
+    DESTROY_QUEUE(FormatElementQueue, formatElementQ);
+    freeTokenizer(tokenizer);
+    return listLength;
+}
+
+void freeFormatElementArray(FormatElement* a, int length) {
+    int i = 0;
+    for (i = 0; i < length; i++) {
+        if (a[i].elementType == FORMAT_EM) {
+            freeFormatElementArray(a[i].elementContent.nestedContent,
+                                   a[i].elementContentLength);
+        } else {
+            free(a[i].elementContent.textContent);
+        }
+    }
+    free(a);
+}
+
+#ifdef FORMATTER_TEST
+#include <stdio.h>
+
+const char* str = "My name in Chinese is \xe7\x86\x8a\xe5\xa4\xa7\xe8\xa1\x9b, or *xiao da wei*.  My favorite Greek passage is \xe1\xbc\x90\xce\xbd \xe1\xbc\x80\xcf\x81\xcf\x87\xe1\xbf\x87 \xe1\xbc\xa6\xce\xbd \xe1\xbd\x81 \xce\xbb\xe1\xbd\xb9\xce\xb3\xce\xbf\xcf\x82.^{cite}";
+
+int
+main() {
+    FormatElement* lst;
+    int l = formatText(str, &lst);
+    return 0;
+}
+#endif
--- a/src/format.h
+++ b/src/format.h
@@ -0,0 +1,30 @@
+#ifndef _FORMAT_H
+#define _FORMAT_H
+#include "citations.h"
+
+typedef enum {
+    FORMAT_TEXT,
+    FORMAT_EM,
+    FORMAT_STRONG,
+    FORMAT_CITATION,
+    FORMAT_GREEK,
+    FORMAT_UNICODE
+} FormatElementType;
+
+typedef struct FormatElement FormatElement;
+
+typedef union {
+    char* textContent;
+    FormatElement* nestedContent;
+} FormatElementContent;
+
+typedef struct FormatElement {
+    FormatElementType elementType;
+    int elementContentLength;
+    FormatElementContent elementContent;
+} FormatElement;
+
+int formatText(const char* txt, FormatElement** dst, CitationRecordQueue* citationQPtr);
+void freeFormatElementArray(FormatElement* a, int length);
+
+#endif /* !def _FORMAT_H */
--- a/src/hash.c
+++ b/src/hash.c
@@ -0,0 +1,21 @@
+#include <stdint.h>
+
+status uint32_t powmod(uint32_t x, uint32_t y, uint32_t m) {
+    if (y == 0) {
+        return 1;
+    } else if (y % 2 == 0) {
+        uint32_t result = powmod(x, y / 2, m);
+        return (result * result) % m;
+    } else {
+        return (x * powmod(x, y - 1, m)) % m;
+    }
+}
+
+uint32_t
+HASH_KEY(const char *str) {
+    uint32_t result = 0;
+    for (i = 0; str[i] != '\0'; i++) {
+        result = (result + (i + 1) * powmod(31, str[i], 1024)) % 1024;
+    }
+    return result;
+}
--- a/src/hash.h
+++ b/src/hash.h
@@ -0,0 +1,36 @@
+#ifndef _HASH_H
+#define _HASH_H
+
+#include "queue.h"
+
+#define HASH_ENTRIES 1024
+uint32_t HASH_KEY(char *);
+
+#define DEFINE_HASH(T, N) \
+    typedef struct { \
+        char* key; \
+        T data; \
+    } N##Entry; \
+    DEFINE_QUEUE(N##Entry, N##CollisionQueue) \
+    typedef N##CollisionQueue N[HASH_ENTRIES]; \
+    const N##Entry* N##Find(N tbl, const char* key) { \
+        uint32_t track = HASH_KEY(key); \
+        FOREACH_QUEUE(N##CollisionQueue, tbl[track], iter) { \
+            if (strcmp(key, iter->data.key) == 0) { \
+                return &iter->data; \
+            } \
+        } \
+        return NULL; \
+    }
+
+#define INIT_HASH(T, H) { \
+    int i; \
+    for (i = 0; i < HASH_ENTRIES; i++) { \
+        (H)[i].length = 0; \
+        (H)[i].tail = (H)[i].head = NULL; \
+    } \
+}
+
+
+
+#endif /* !def _HASH_H */
--- a/src/main.c
+++ b/src/main.c
@@ -2,73 +2,38 @@
 #include <stdlib.h>
 #include <string.h>
 #include <libxml/tree.h>
+#include "options.h"
 #include "sermon.h"
 #include "xml.h"
+#include "xslt.h"

 extern int yyparse(Sermon *);
 extern FILE* yyin;

-void usage(const char* progname) {
-    fprintf(stderr, "Usage: %s [-h] FILE\n"
-            "\n"
-            "    -h           Display help message\n"
-            "\n"
-            "    FILE         sermon file to scan\n", progname);
-}
-
 int main(int argc, char* argv[]) {
    Sermon sermon;
-    xmlDocPtr document;
+    xmlDocPtr document, transformed;
    int i = 0, block = 0, normal = 0;
-    const char* progname = argv[0], *filename = NULL;
-    while (++i < argc) {
-        if (strcmp(argv[i], "-h") == 0) { usage(progname); exit(0); }
-        else if ((argv[i][0] == '-') && (argv[i][1] != '\0')) {
-            fprintf(stderr, "Unknown option: %s\n", argv[i]);
-        }
-        else {
-            filename = argv[i];
-        }
-    }
-    if (!filename) {
-        usage(progname);
-        exit(1);
-    }
+    InitOptions(argc, argv);
    InitSermon(&sermon);
-    if (strcmp(filename, "-") == 0) {
+    if (strcmp(options.inputFileName, "-") == 0) {
        yyin = stdin;
    } else {
-        yyin = fopen(argv[1], "rt");
+        yyin = fopen(options.inputFileName, "rt");
    }
    yyparse(&sermon);
-    /*
-    printf("Parsed sermon.\n");
-    printf("TITLE=%s\n", sermon.sermonTitle ? sermon.sermonTitle : "none");
-    printf("AUTHOR=%s\n", sermon.sermonAuthor ? sermon.sermonAuthor : "none");
-    printf("DATE=%s\n", sermon.sermonDate ? sermon.sermonDate : "none");
-    printf("OCCASION=%s\n", sermon.sermonOccasion ? sermon.sermonOccasion : "none");
-    printf("TEXT=%s\n", sermon.sermonText ? sermon.sermonText : "none");
-    printf("\nThere are %d paragraphs", sermon.numParagraphs);
-    for (i = 0; i < sermon.numParagraphs; i++) {
-        if (sermon.sermonParagraphs[i].paraType == PARA_DEFAULT) normal++;
-        else if (sermon.sermonParagraphs[i].paraType == PARA_BLOCKQUOTE) block++;
-    }
-    printf(" (%d regular, %d blockquote)\n", normal, block);
-    printf("\nThere are %d references.\n", sermon.numReferences);
-    for (i = 0; i < sermon.numReferences; i++) {
-        printf("    - %s: %s\n", sermon.sermonReferences[i].refId, sermon.sermonReferences[i].refText);
-    }
-    printf("\n");
-    */

    document = sermonToXmlDoc(&sermon);
-    printXML(document);
+    transformed = applyStyleSheet(document, "html5");
+    printXML(transformed);
    xmlFreeDoc(document);
+    xmlFreeDoc(transformed);

    /* clean up, clean up, everybody, everywhere! */
    FreeSermon(&sermon);
-    if (strcmp(filename, "-") != 0) {
+    if (strcmp(options.inputFileName, "-") != 0) {
        fclose(yyin);
    }
+    FreeOptions();
 }

--- a/src/options.c
+++ b/src/options.c
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#include "options.h"
+
+Options options = { .progname = NULL, .datadir = DATADIR };
+
+char*
+datadir(const char* progname) {
+    struct stat sb;
+    const char* progdir = dirname(progname);
+    char* local_datadir = NULL;
+    char result[PATH_MAX];
+    int l = strlen(progdir) + 9;
+
+    if (stat(DATADIR, &sb) == 0) {
+        if (S_ISDIR(sb.st_mode)) {
+            return strdup(DATADIR);
+        }
+    }
+
+    local_datadir = malloc(l);
+    strlcpy(local_datadir, progdir, l);
+    strlcat(local_datadir, "/../data", l);
+    realpath(local_datadir, result);
+    free(local_datadir);
+    if (stat(result, &sb) == 0) {
+        if (S_ISDIR(sb.st_mode)) {
+            return strdup(result);
+        }
+    }
+
+    return NULL;
+}
+
+static void usage(const char* progname) {
+    fprintf(stderr, "Usage: %s [-h] FILE\n"
+            "\n"
+            "    -h           Display help message\n"
+            "\n"
+            "    FILE         sermon file to scan (\"-\" for stdin)\n", progname);
+}
+
+void InitOptions(int argc, const char* argv[]) {
+    int i = 0;
+    options.progname = argv[0];
+    options.datadir = datadir(options.progname);
+    while (++i < argc) {
+        if (strcmp(argv[i], "-h") == 0) { usage(options.progname); exit(0); }
+        else if (strcmp(argv[i], "-") == 0) {
+            options.inputFileName = argv[i];
+        } else if (argv[i][0] == '-') {
+            fprintf(stderr, "Unknown option: %s\n", argv[i]);
+        } else {
+            options.inputFileName = argv[i];
+        }
+    }
+
+    /* input filename required */
+    if (!options.inputFileName) {
+        usage(options.progname);
+        exit(1);
+    }
+}
+
+char*
+OptionsDataFile(const char* fname) {
+    char result[PATH_MAX], *t = malloc(strlen(options.datadir) + strlen(fname) + 2);
+    strlcpy(t, options.datadir, PATH_MAX);
+    strlcat(t, "/", PATH_MAX);
+    strlcat(t, fname, PATH_MAX);
+    realpath(t, result);
+    free(t);
+    return strdup(result);
+}
+
+void FreeOptions() {
+    free(options.datadir);
+}
--- a/src/options.h
+++ b/src/options.h
@@ -0,0 +1,16 @@
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+typedef struct {
+    const char* progname;
+    char* datadir;
+    const char* inputFileName;
+} Options;
+
+extern Options options;
+
+void InitOptions(int argc, const char* argv[]);
+char* OptionsDataFile(const char* fname);
+void FreeOptions();
+
+#endif /* !def _OPTIONS_H */
--- a/src/queue.h
+++ b/src/queue.h
@@ -30,6 +30,7 @@
 */
 #ifndef _QUEUE_H
 #define _QUEUE_H
+#include <stdio.h>
 #include <stdlib.h>

 #define DEFINE_QUEUE(T, N) \
@@ -45,6 +46,13 @@ typedef struct { \
 #define NEW_QUEUE(T, N) \
    T N = (T) { .length = 0, .head = NULL, .tail = NULL }

+/* WARNING: this is probably not what you want -- see DESTROY_QUEUE below */
+#define REINIT_QUEUE(N) { \
+    (N).length = 0; \
+    (N).head = NULL; \
+    (N).tail = NULL; \
+}
+
 #define APPEND_QUEUE(T, Q, E) { \
    struct _##T##Node* n = (struct _##T##Node*)malloc(sizeof(struct _##T##Node)); \
    if (!n) { perror ("Could not allocate space for new queue element."); exit(1); } \
@@ -56,7 +64,7 @@ typedef struct { \
    (Q).length++; \
 }

-#define QUEUE_LENGTH(Q) Q.length
+#define QUEUE_LENGTH(Q) (Q).length

 #define FOREACH_QUEUE(T, Q, N) { \
    struct _##T##Node* N = NULL; \
@@ -89,10 +97,10 @@ typedef struct { \

 #define QUEUE_TO_ARRAY(QT, Q, T, DST) { \
    int i = 0; \
-    DST = (T*)calloc(Q.length,sizeof(T)); \
-    if (!DST) { perror("Could not allocate space for array."); exit(1); } \
+    (DST) = (T*)calloc((Q).length,sizeof(T)); \
+    if (!(DST)) { perror("Could not allocate space for array."); exit(1); } \
    FOREACH_QUEUE(QT, Q, ptr) \
-        DST[i++] = ptr->data; \
+        (DST)[i++] = ptr->data; \
    FOREACH_QUEUE_END; \
 }

--- a/src/sermon.h
+++ b/src/sermon.h
@@ -1,5 +1,6 @@
 #ifndef _SERMON_H
 #define _SERMON_H
+#include "format.h"

 typedef struct {
    char* headerType;
@@ -13,7 +14,8 @@ typedef enum {

 typedef struct {
    SermonParagraphType paraType;
-    char* paraText;
+    int paraContentLength;
+    FormatElement* paraContent;
 } SermonParagraph;

 typedef struct {
--- a/src/sermon_parser.y
+++ b/src/sermon_parser.y
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "queue.h"
+#include "citations.h"
 #include "sermon.h"

 #ifdef PARSE_DEBUG
@@ -22,6 +23,7 @@ DEFINE_QUEUE(SermonReference, ReferenceQueue);
 NEW_QUEUE(LineQueue, lineQ);
 NEW_QUEUE(ParagraphQueue, paragraphQ);
 NEW_QUEUE(ReferenceQueue, referenceQ);
+NEW_QUEUE(CitationRecordQueue, citationQ);

 char* lineQueueToString(LineQueue* lq) {
    int paraLength = 0, idx = 0;
@@ -64,11 +66,27 @@ void yyerror(Sermon*, const char*);
 sermon:
      headerlist sermontext references {
          sermon->numParagraphs = QUEUE_LENGTH(paragraphQ);
-          sermon->numReferences = QUEUE_LENGTH(referenceQ);
+          sermon->numReferences = QUEUE_LENGTH(citationQ);
          if (sermon->numParagraphs) QUEUE_TO_ARRAY(ParagraphQueue, paragraphQ, SermonParagraph, sermon->sermonParagraphs);
-          if (sermon->numReferences) QUEUE_TO_ARRAY(ReferenceQueue, referenceQ, SermonReference, sermon->sermonReferences);
+          if (sermon->numReferences) {
+              sermon->sermonReferences = calloc(QUEUE_LENGTH(citationQ), sizeof(SermonReference));
+              FOREACH_QUEUE(ReferenceQueue, referenceQ, ptr) {
+                  int i = lookupCitation(citationQ, ptr->data.refId);
+                  if (i) {
+                      sermon->sermonReferences[i - 1] = ptr->data;
+                  }
+              }
+              FOREACH_QUEUE_END
+          } else {
+              sermon->sermonReferences = NULL;
+          }
          DESTROY_QUEUE(ParagraphQueue, paragraphQ);
          DESTROY_QUEUE(ReferenceQueue, referenceQ);
+          FOREACH_QUEUE(CitationRecordQueue, citationQ, ptr) {
+              free(ptr->data.refId);
+          }
+          FOREACH_QUEUE_END
+          DESTROY_QUEUE(CitationRecordQueue, citationQ);
      }
      ;
 break:
@@ -97,20 +115,26 @@ sermontext:
        ;
 block:
     para {
-         SermonParagraph p = {
-            .paraType = PARA_DEFAULT,
-            .paraText = lineQueueToString(&lineQ)
-         };
+         SermonParagraph p = { .paraType = PARA_DEFAULT };
+         char* paraText = lineQueueToString(&lineQ);
+         FormatElement* paraContent = NULL;
+         int paraContentLength = formatText(paraText, &paraContent, &citationQ);
+         p.paraContentLength = paraContentLength;
+         p.paraContent = paraContent;
         APPEND_QUEUE(ParagraphQueue, paragraphQ, p);
         PARSEPRINT("Parsed paragraph:\n%s\n\n", p.paraText);
+         free(paraText);
     }
   | blockquote {
-         SermonParagraph p = {
-             .paraType = PARA_BLOCKQUOTE,
-             .paraText = lineQueueToString(&lineQ)
-         };
+         SermonParagraph p = { .paraType = PARA_BLOCKQUOTE };
+         char* paraText = lineQueueToString(&lineQ);
+         FormatElement* paraContent = NULL;
+         int paraContentLength = formatText(paraText, &paraContent, &citationQ);
+         p.paraContentLength = paraContentLength;
+         p.paraContent = paraContent;
         APPEND_QUEUE(ParagraphQueue, paragraphQ, p);
         PARSEPRINT("Parsed paragraph:\n%s\n\n", p.paraText);
+         free(paraText);
     }
   ;
 para:
@@ -126,7 +150,14 @@ references:
        | /* empty */
        ;
 reference:
-         '{' KW_REF ':' ID ':' REFTEXT '}' { SermonReference r = { .refId = $4, .refText = { .paraType = PARA_DEFAULT, .paraText = $6 } }; APPEND_QUEUE(ReferenceQueue, referenceQ, r); }
+         '{' KW_REF ':' ID ':' REFTEXT '}' {
+             SermonReference r = { .refId = $4 };
+             SermonParagraph p = { .paraType = PARA_DEFAULT };
+             FormatElement* paraContent = NULL;
+             p.paraContentLength = formatText($6, &p.paraContent, NULL);
+             r.refText = p;
+             APPEND_QUEUE(ReferenceQueue, referenceQ, r);
+         }
        ;
 %%

--- a/src/sermon_util.c
+++ b/src/sermon_util.c
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "sermon.h"
+#include "format.h"

 void InitSermon(Sermon* srm) {
    memset(srm, 0, sizeof(Sermon));
@@ -16,14 +17,20 @@ void FreeSermon(Sermon* srm) {

    if (srm->numParagraphs) {
        for (i = 0; i < srm->numParagraphs; i++) {
-            free(srm->sermonParagraphs[i].paraText);
+            freeFormatElementArray(
+                srm->sermonParagraphs[i].paraContent,
+                srm->sermonParagraphs[i].paraContentLength
+            );
        }
        free(srm->sermonParagraphs);
    }
    if (srm->numReferences) {
        for (i = 0; i < srm->numReferences; i++) {
            free(srm->sermonReferences[i].refId);
-            free(srm->sermonReferences[i].refText.paraText);
+            freeFormatElementArray(
+                srm->sermonReferences[i].refText.paraContent,
+                srm->sermonReferences[i].refText.paraContentLength
+            );
        }
        free(srm->sermonReferences);
    }
--- a/src/stack.h
+++ b/src/stack.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+ * stack.h - macros for defining a LIFO stack
+ *
+ * Copyright (C) 2015 by David A. Baer, All Rights Reserved
+ *
+ * Description:
+ *
+ *    These macros are designed to make it easier to process input whose
+ *    length is unknown before it terminates (e.g. a series of numbers),
+ *    where a last-in, first-out structure is helpful.
+ *
+ */
+#ifndef _STACK_H
+#define _STACK_H
+#include <stdlib.h>
+
+#define DEFINE_STACK(T, N) \
+struct _##N##Node { \
+    T data; \
+    struct _##N##Node* next; \
+};\
+typedef struct { \
+    int height; \
+    struct _##N##Node *head; \
+} N
+
+#define NEW_STACK(T, N) \
+    T N = (T) { .height = 0, .head = NULL }
+
+#define PUSH_STACK(T, S, E) { \
+    struct _##T##Node* n = (struct _##T##Node*)malloc(sizeof(struct _##T##Node)); \
+    if (!n) { perror ("Could not allocate space for new stack element."); exit(1); } \
+    n->data = E; \
+    n->next = (S).head; \
+    (S).head = n; \
+    (S).height++; \
+}
+
+#define STACK_HEIGHT(S) S.height
+
+/* WARNING: YOU MUST VERIFY THAT STACK IS NONEMPTY */
+#define STACK_HEAD(S) (S).head->data
+#define POP_STACK(T, S) { struct _##T##Node* tmp = (S).head->next; free((S).head); (S).head = tmp; (S).height--; }
+
+#define DESTROY_STACK(T, S) { \
+    struct _##T##Node* ptr = (S).head; \
+    while (ptr != NULL) { \
+        struct _##T##Node* tmp = ptr->next; \
+        free(ptr); \
+        ptr = tmp; \
+    } \
+    (S).head = NULL; \
+    (S).height = 0; \
+}
+        
+#endif /* !def _STACK_H */
+
--- a/src/xml.c
+++ b/src/xml.c
@@ -1,46 +1,114 @@
 #include <libxml/tree.h>
+#include <string.h>
 #include "sermon.h"

+static xmlNsPtr srmNs = NULL;
+
 static void
-appendHeaderNode(xmlNodePtr headerNode, const char* headerName,
-        const char* headerText) {
+appendHeaderNode(xmlNsPtr sermon_ns, xmlNodePtr headerNode,
+        const char* headerName, const char* headerText) {
    if (headerText) {
-        xmlNodePtr ptr = xmlNewNode(NULL, headerName);
+        xmlNodePtr ptr = xmlNewNode(sermon_ns, headerName);
        xmlAddChild(ptr, xmlNewText(headerText));
        xmlAddChild(headerNode, ptr);
    }
 }

 xmlNodePtr
-sermonHeader(const Sermon* srm) {
-    xmlNodePtr header = xmlNewNode(NULL, "header");
-    appendHeaderNode(header, "title", srm->sermonTitle);
-    appendHeaderNode(header, "author", srm->sermonAuthor);
-    appendHeaderNode(header, "occasion", srm->sermonOccasion);
-    appendHeaderNode(header, "date", srm->sermonDate);
-    appendHeaderNode(header, "text", srm->sermonText);
+sermonHeader(xmlNsPtr sermon_ns, const Sermon* srm) {
+    xmlNodePtr header = xmlNewNode(sermon_ns, "header");
+    appendHeaderNode(sermon_ns, header, "title", srm->sermonTitle);
+    appendHeaderNode(sermon_ns, header, "author", srm->sermonAuthor);
+    appendHeaderNode(sermon_ns, header, "occasion", srm->sermonOccasion);
+    appendHeaderNode(sermon_ns, header, "date", srm->sermonDate);
+    appendHeaderNode(sermon_ns, header, "text", srm->sermonText);
    return header;
 }

+int
+findReferenceNumber(
+    int numReferences,
+    const SermonReference* sermonReferencesPtr,
+    const char* refId) {
+    int i;
+    for (i = 0; i < numReferences; i++) {
+        if (strcmp(sermonReferencesPtr[i].refId, refId) == 0) {
+            return i + 1;
+        }
+    }
+    return 0;
+}
+
+void
+formatElementsToXML(
+    xmlNsPtr sermon_ns,
+    xmlNodePtr parentElement,
+    const FormatElement* a,
+    int length,
+    int numReferences,
+    const SermonReference* sermonReferencesPtr) {
+    int i = 0;
+    for (i = 0; i < length; i++) {
+        if (a[i].elementType == FORMAT_EM) {
+            xmlNodePtr em = xmlNewNode(sermon_ns, "em");
+            formatElementsToXML(sermon_ns, em, a[i].elementContent.nestedContent, a[i].elementContentLength, numReferences, sermonReferencesPtr);
+            xmlAddChild(parentElement, em);
+        } else if (a[i].elementType == FORMAT_TEXT) {
+            xmlAddChild(parentElement, xmlNewText(a[i].elementContent.textContent));
+        } else if (a[i].elementType == FORMAT_GREEK) {
+            xmlNodePtr greek = xmlNewNode(sermon_ns, "greek");
+            xmlAddChild(greek, xmlNewText(a[i].elementContent.textContent));
+            xmlAddChild(parentElement, greek);
+        } else if (a[i].elementType == FORMAT_UNICODE) {
+            xmlNodePtr unicode = xmlNewNode(sermon_ns, "unicode");
+            xmlAddChild(unicode, xmlNewText(a[i].elementContent.textContent));
+            xmlAddChild(parentElement, unicode);
+        } else if (a[i].elementType == FORMAT_CITATION) {
+            xmlNodePtr cite = xmlNewNode(sermon_ns, "cite");
+            int num = findReferenceNumber(numReferences, sermonReferencesPtr, a[i].elementContent.textContent);
+            if (num == 0) {
+                fprintf(stderr, "WARNING: reference %s not found -- omitted\n", a[i].elementContent.textContent);
+            } else {
+                char n[24];
+                snprintf(n, 24, "%d", num);
+                xmlNewProp(cite, "number", n);
+                xmlAddChild(parentElement, cite);
+            }
+        }
+    }
+}

 static xmlNodePtr
-paragraphToXML(const SermonParagraph* p) {
-    xmlNodePtr result = xmlNewNode(NULL, "p");
-    xmlAddChild(result, xmlNewText(p->paraText));
+paragraphToXML(
+        xmlNsPtr sermon_ns,
+        const SermonParagraph* p,
+        int numReferences,
+        const SermonReference* sermonReferencesPtr) {
+    xmlNodePtr result = xmlNewNode(sermon_ns, "p");
+    formatElementsToXML(
+        sermon_ns,
+        result,
+        p->paraContent,
+        p->paraContentLength,
+        numReferences,
+        sermonReferencesPtr
+    );
    return result;
 }

 xmlNodePtr
-sermonBody(const Sermon* srm) {
-    xmlNodePtr body = xmlNewNode(NULL, "body");
+sermonBody(xmlNsPtr sermon_ns, const Sermon* srm) {
+    xmlNodePtr body = xmlNewNode(sermon_ns, "body");
    xmlNodePtr block = NULL;
    int i = 0;
    for (i = 0; i < srm->numParagraphs; i++) {
        const SermonParagraph* p = &srm->sermonParagraphs[i];
-        xmlNodePtr para = paragraphToXML(p);
+        xmlNodePtr para = paragraphToXML(
+            sermon_ns, p, srm->numReferences, srm->sermonReferences
+        );
        if (p->paraType == PARA_BLOCKQUOTE) {
            if (!block) {
-                block = xmlNewNode(NULL, "quote");
+                block = xmlNewNode(sermon_ns, "quote");
                xmlAddChild(body, block);
            }
            xmlAddChild(block, para);
@@ -53,16 +121,16 @@ sermonBody(const Sermon* srm) {
 }

 xmlNodePtr
-sermonFooter(const Sermon* srm) {
-    xmlNodePtr footer = xmlNewNode(NULL, "footer");
+sermonFooter(xmlNsPtr sermon_ns, const Sermon* srm) {
+    xmlNodePtr footer = xmlNewNode(sermon_ns, "footer");
    int i = 0;
    char num[10];
    for (i = 0; i < srm->numReferences; i++) {
        const SermonReference* r = &srm->sermonReferences[i];
-        xmlNodePtr ref = xmlNewNode(NULL, "ref");
+        xmlNodePtr ref = xmlNewNode(sermon_ns, "ref");
        snprintf(num, 10, "%d", i + 1);
        xmlNewProp(ref, "number", num);
-        xmlAddChild(ref, paragraphToXML(&r->refText));
+        xmlAddChild(ref, paragraphToXML(sermon_ns, &r->refText, srm->numReferences, srm->sermonReferences));
        xmlAddChild(footer, ref);
    }
    return footer;
@@ -72,20 +140,21 @@ xmlDocPtr
 sermonToXmlDoc(const Sermon* srm) {
    /* document creation and setup */
    xmlDocPtr document = xmlNewDoc("1.0");
-    xmlDtdPtr dtd = xmlCreateIntSubset(document, "sermon", NULL, DATADIR "/sermon.dtd");
+    xmlDtdPtr dtd = xmlCreateIntSubset(document, "sermon", NULL, "file://" DATADIR "/sermon.dtd");
    xmlNodePtr sermon = xmlNewNode(NULL, "sermon");
    xmlNsPtr sermon_ns = xmlNewNs(sermon, "urn:david-sermon", NULL);
    xmlDocSetRootElement(document, sermon);
+    xmlSetNs(sermon, sermon_ns);

    /* add header */
-    xmlAddChild(sermon, sermonHeader(srm));
+    xmlAddChild(sermon, sermonHeader(sermon_ns, srm));

    /* add body paragraphs */
-    xmlAddChild(sermon, sermonBody(srm));
+    xmlAddChild(sermon, sermonBody(sermon_ns, srm));

    if (srm->numReferences) {
        /* add footer */
-        xmlAddChild(sermon, sermonFooter(srm));
+        xmlAddChild(sermon, sermonFooter(sermon_ns, srm));
    }

    return document;
--- a/src/xslt.c
+++ b/src/xslt.c
@@ -0,0 +1,29 @@
+#include <string.h>
+#include <libxml/tree.h>
+#include <libxslt/xslt.h>
+#include <libxslt/transform.h>
+#include "options.h"
+
+xmlDocPtr
+applyStyleSheet(xmlDocPtr document, const char* styleSheetName) {
+    int l = strlen(styleSheetName) + 5;
+    char *t = malloc(l), *styleSheetFileName = NULL;
+    xsltStylesheetPtr xsl = NULL;
+    xmlDocPtr res = NULL;
+
+    strlcpy(t, styleSheetName, l);
+    strlcat(t, ".xsl", l);
+    styleSheetFileName = OptionsDataFile(t);
+    free(t);
+    fprintf(stderr, "Loading stylesheet %s ...\n", styleSheetFileName);
+
+    xmlSubstituteEntitiesDefault(1);
+    xmlLoadExtDtdDefaultValue = 1;
+    xsl = xsltParseStylesheetFile(styleSheetFileName);
+    res = xsltApplyStylesheet(xsl, document, NULL);
+
+    free(styleSheetFileName);
+    xsltFreeStylesheet(xsl);
+
+    return res;
+}
--- a/src/xslt.h
+++ b/src/xslt.h
@@ -0,0 +1,8 @@
+#ifndef _XSLT_H
+#define _XSLT_H
+
+#include <libxml/tree.h>
+
+xmlDocPtr applyStyleSheet(xmlDocPtr document, const char* styleSheetName);
+
+#endif /* !def _XSLT_H */