24 Commits
v1.0 ... master

Author SHA1 Message Date
cf79834870 Pointer qualifier fix 2025-08-05 12:03:50 -04:00
4171c81bfd Bump version number 2025-08-05 11:39:41 -04:00
09b801c150 Fix missing #includes 2025-08-02 21:16:32 -04:00
1807044977 Account for different implementations of realpath(3) 2025-06-05 11:06:49 -04:00
David Baer
da8080a7de Bump version, ensure manpage comes along 2018-09-08 23:15:55 -04:00
David Baer
ddee2d2702 Remove (probably useless) typecast to avoid error message from GCC 2018-09-08 23:14:04 -04:00
David Baer
ec5275ab88 Fix segfault caused by apparently poor understanding of operator precedence 2018-09-08 22:17:32 -04:00
David Baer
3b94588d25 mimetype needs to come first, in opendocument spec 2018-09-08 22:16:22 -04:00
David Baer
321731f51f Install man page 2018-07-17 16:03:09 -04:00
David Baer
dad4175887 Bump minor version 2018-07-17 16:00:25 -04:00
David Baer
95834f00ff Enable line-breaking blockquotes (prefaced by |)
TODO: No indentation on first line
2018-07-17 15:56:58 -04:00
David Baer
44b2187d76 Make scanner portable (works with regular lex) 2018-07-03 09:56:28 -04:00
David Baer
c98e91d810 Add license and installation instructions 2017-12-20 10:18:12 -05:00
David Baer
5c693aa638 Bump version to 1.2 2017-08-09 22:31:37 -04:00
David Baer
517a9d9605 Clang compatibility 2017-08-09 22:27:29 -04:00
David Baer
9a084fe0bd Add some needed checks 2017-08-09 22:27:06 -04:00
David Baer
91e3daf3dc README.md edited online with Bitbucket 2017-01-23 05:18:11 +00:00
David Baer
9abc0f159c Mark version 1.1 2017-01-23 00:16:32 -05:00
David Baer
eb62727ff9 Make links live again 2017-01-23 00:14:41 -05:00
David Baer
549a6cbab7 Add BSD license info 2017-01-22 20:01:07 -05:00
David Baer
af79834d89 Change parens 2017-01-22 19:52:14 -05:00
David Baer
501d6c87c2 Remove unnecessary test code. 2016-07-07 16:39:09 -04:00
David Baer
cbd16b6ab1 Oops. Not sure how to install man pages.
I'll leave that for later.
2016-07-07 16:12:23 -04:00
David Baer
0f69fd7a8e Add man page 2016-07-07 14:41:28 -04:00
19 changed files with 284 additions and 41 deletions

2
.gitignore vendored
View File

@@ -2,6 +2,7 @@ Makefile
Makefile.in Makefile.in
aclocal.m4 aclocal.m4
autom4te.cache/ autom4te.cache/
compile
config.h config.h
config.h.in config.h.in
config.h.in~ config.h.in~
@@ -22,3 +23,4 @@ stamp-h1
ylwrap ylwrap
*.o *.o
*.core *.core
*.tar.gz

22
COPYING Normal file
View File

@@ -0,0 +1,22 @@
COPYRIGHT (C) 2015-2017 by David Baer <david@amyanddavid.net>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1
INSTALL Symbolic link
View File

@@ -0,0 +1 @@
/usr/local/share/automake-1.15/INSTALL

View File

@@ -1,2 +1,3 @@
SUBDIRS = src data SUBDIRS = src data
dist_doc_DATA = README.md dist_doc_DATA = README.md
dist_man1_MANS = sermon.1

View File

@@ -1,4 +1,4 @@
sermon 1.0 sermon 1.4
========== ==========
This utility converts text markup into various presentable forms. This utility converts text markup into various presentable forms.

View File

@@ -2,13 +2,13 @@
# Process this file with autoconf to produce a configure script. # Process this file with autoconf to produce a configure script.
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([sermon], [1.0], [david.a.baer@gmail.com]) AC_INIT([sermon], [1.5], [david.a.baer@gmail.com])
AM_INIT_AUTOMAKE([-Wall -Werror foreign]) AM_INIT_AUTOMAKE([-Wall -Werror foreign])
AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_SRCDIR([config.h.in])
AC_CONFIG_HEADERS([config.h]) AC_CONFIG_HEADERS([config.h])
# Checks for programs. # Checks for programs.
AC_PROG_CC AC_PROG_CC_C99
AC_PROG_LEX AC_PROG_LEX
AC_PROG_YACC AC_PROG_YACC
@@ -24,16 +24,20 @@ AC_CHECK_HEADERS([inttypes.h libintl.h limits.h malloc.h stddef.h stdint.h stdli
AC_C_INLINE AC_C_INLINE
AC_TYPE_INT16_T AC_TYPE_INT16_T
AC_TYPE_INT32_T AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T AC_TYPE_INT8_T
AC_TYPE_PID_T
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_UINT16_T AC_TYPE_UINT16_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T AC_TYPE_UINT8_T
# Checks for library functions. # Checks for library functions.
AC_FUNC_FORK
AC_FUNC_MALLOC AC_FUNC_MALLOC
AC_FUNC_REALLOC AC_FUNC_REALLOC
AC_CHECK_FUNCS([memset pledge realpath strdup strndup]) AC_CHECK_FUNCS([localtime_r pledge memset realpath strcasecmp strdup strndup strrchr])
AC_CONFIG_FILES([Makefile AC_CONFIG_FILES([Makefile
data/Makefile data/Makefile

View File

@@ -78,7 +78,7 @@
</xsl:template> </xsl:template>
<xsl:template match="ser:p" mode="quote"> <xsl:template match="ser:p" mode="quote">
<text:p text:style-name="Quotations"><text:span text:style-name="T2"><xsl:apply-templates select="*|text()"/></text:span></text:p> <text:p text:style-name="Quotations"><xsl:apply-templates select="*|text()"/></text:p>
</xsl:template> </xsl:template>
<xsl:template match="ser:quote" mode="body"> <xsl:template match="ser:quote" mode="body">

48
sermon.1 Normal file
View File

@@ -0,0 +1,48 @@
.Dd $Mdocdate: July 7 2016$
.Dt SERMON 1
.Os
.Sh NAME
.Nm sermon
.Nd converts sermon text markup to a variety of formats
.Sh SYNOPSIS
.Nm sermon
.Op Fl hv
.Op Fl a Ar author
.Op Fl s Ar stylename
.Op Fl o Ar output
.Ar
.Sh DESCRIPTION
The
.Nm
utility converts sermon markup text into a variety of useful formats,
including
.Em HTML Ns
,
.Em ODT Ns
, and
.Em XSL-FO No Ns .
.\" .Sh CONTEXT
.\" For section 9 functions only.
.\" .Sh IMPLEMENTATION NOTES
.\" Not used in OpenBSD.
.\" .Sh RETURN VALUES
.\" For sections 2, 3, and 9 function return values only.
.\" .Sh ENVIRONMENT
.\" For sections 1, 6, 7, and 8 only.
.\" .Sh FILES
.\" .Sh EXIT STATUS
.\" For sections 1, 6, and 8 only.
.\" .Sh EXAMPLES
.\" .Sh DIAGNOSTICS
.\" For sections 1, 4, 6, 7, 8, and 9 printf/stderr messages only.
.\" .Sh ERRORS
.\" For sections 2, 3, 4, and 9 errno settings only.
.\" .Sh SEE ALSO
.\" .Xr foobar 1
.\" .Sh STANDARDS
.\" .Sh HISTORY
.\" .Sh AUTHORS
.\" .Sh CAVEATS
.\" .Sh BUGS
.\" .Sh SECURITY CONSIDERATIONS
.\" Not used in OpenBSD.

View File

@@ -27,6 +27,7 @@
* *
*/ */
#include <ctype.h>
#include <string.h> #include <string.h>
#include "queue.h" #include "queue.h"
#include "stack.h" #include "stack.h"
@@ -44,6 +45,8 @@ typedef enum {
TOK_UNICODE, TOK_UNICODE,
TOK_STAR, TOK_STAR,
TOK_REF, TOK_REF,
TOK_URL,
TOK_BREAK,
/* /*
TOK_DASH, TOK_DASH,
TOK_OPEN_DOUBLE_QUOTE, TOK_OPEN_DOUBLE_QUOTE,
@@ -68,22 +71,37 @@ freeTokenizer(utf8iterator* iter) {
utf8FreeIterator(iter); utf8FreeIterator(iter);
} }
inline int int
greekChar(uint32_t ch) { greekChar(uint32_t ch) {
return (((0x370 <= ch) && (ch <= 0x3ff)) || return (((0x370 <= ch) && (ch <= 0x3ff)) ||
((0x1f00 <= ch) && (ch <= 0x1fff))); ((0x1f00 <= ch) && (ch <= 0x1fff)));
} }
inline int int
extendedPunctuation(uint32_t ch) { extendedPunctuation(uint32_t ch) {
return ((0x2000 <= ch) && (ch <= 0x206f)); return ((0x2000 <= ch) && (ch <= 0x206f));
} }
inline int int
latinChar(uint32_t ch) { latinChar(uint32_t ch) {
return (ch <= 0xff) || extendedPunctuation(ch); return (ch <= 0xff) || extendedPunctuation(ch);
} }
int
httpAt(Tokenizer tokenizer) {
return ((tolower(tokenizer->txt[tokenizer->byteIndex]) == 'h') &&
(tolower(tokenizer->txt[tokenizer->byteIndex + 1]) == 't') &&
(tolower(tokenizer->txt[tokenizer->byteIndex + 2]) == 't') &&
(tolower(tokenizer->txt[tokenizer->byteIndex + 3]) == 'p') &&
(((tokenizer->txt[tokenizer->byteIndex + 4] == ':') &&
(tokenizer->txt[tokenizer->byteIndex + 5] == '/') &&
(tokenizer->txt[tokenizer->byteIndex + 6] == '/')) ||
((tolower(tokenizer->txt[tokenizer->byteIndex + 4]) == 's') &&
(tokenizer->txt[tokenizer->byteIndex + 5] == ':') &&
(tokenizer->txt[tokenizer->byteIndex + 6] == '/') &&
(tokenizer->txt[tokenizer->byteIndex + 7] == '/'))));
}
static Token static Token
nextToken(Tokenizer tokenizer) { nextToken(Tokenizer tokenizer) {
int startIndex = tokenizer->byteIndex; int startIndex = tokenizer->byteIndex;
@@ -99,6 +117,11 @@ nextToken(Tokenizer tokenizer) {
result.toktype = TOK_STAR; result.toktype = TOK_STAR;
result.toktext = NULL; result.toktext = NULL;
return result; return result;
} else if (ch == '\n') {
utf8Advance(tokenizer);
result.toktype = TOK_BREAK;
result.toktext = NULL;
return result;
} else if (greekChar(ch)) { } else if (greekChar(ch)) {
while ((ch != 0) && while ((ch != 0) &&
(greekChar(ch) || (ch == ' ') || (ch == ',') || (ch == '.'))) { (greekChar(ch) || (ch == ' ') || (ch == ',') || (ch == '.'))) {
@@ -125,12 +148,30 @@ nextToken(Tokenizer tokenizer) {
result.toktype = TOK_REF; result.toktype = TOK_REF;
result.toktext = strndup(tokenizer->txt + idStart, idEnd - idStart); result.toktext = strndup(tokenizer->txt + idStart, idEnd - idStart);
return result; return result;
} else if (httpAt(tokenizer)) {
int endIndex = 0;
while ((ch != 0) && (ch != ' ') && (ch != '\r') && (ch != '\n')) {
utf8Advance(tokenizer);
ch = utf8CharAt(tokenizer);
}
if (tokenizer->txt[tokenizer->byteIndex - 1] == '.') {
/* heuristic: url doesn't end in . */
endIndex = --tokenizer->byteIndex;
} else {
endIndex = tokenizer->byteIndex;
}
result.toktype = TOK_URL;
result.toktext = strndup(tokenizer->txt + startIndex, endIndex - startIndex);
return result;
} else if (latinChar(ch)) { } else if (latinChar(ch)) {
while ((ch != 0) && latinChar(ch) && (ch != '*')) { while ((ch != 0) && latinChar(ch) && (ch != '*') && (ch != '\n')) {
utf8Advance(tokenizer); utf8Advance(tokenizer);
ch = utf8CharAt(tokenizer); ch = utf8CharAt(tokenizer);
if (ch == '^') { if (ch == '^') {
if (tokenizer->txt[tokenizer->byteIndex + 1] == '{') break; if (tokenizer->txt[tokenizer->byteIndex + 1] == '{') break;
} else if (httpAt(tokenizer)) {
break;
} }
} }
result.toktype = TOK_TEXT; result.toktype = TOK_TEXT;
@@ -172,6 +213,10 @@ int formatText(const char* txt, FormatElement** dst, CitationRecordQueue* citati
REINIT_QUEUE(formatElementQ); REINIT_QUEUE(formatElementQ);
em = 1; em = 1;
} }
} else if (tok.toktype == TOK_BREAK) {
FormatElement elt = { .elementType = FORMAT_BR, .elementContentLength = 0,
.elementContent = { .textContent = NULL } };
APPEND_QUEUE(FormatElementQueue, formatElementQ, elt);
} else { } else {
FormatElementType t; FormatElementType t;
FormatElement elt = { .elementContent = { .textContent = tok.toktext } } ; FormatElement elt = { .elementContent = { .textContent = tok.toktext } } ;
@@ -181,6 +226,8 @@ int formatText(const char* txt, FormatElement** dst, CitationRecordQueue* citati
t = FORMAT_GREEK; t = FORMAT_GREEK;
} else if (tok.toktype == TOK_UNICODE) { } else if (tok.toktype == TOK_UNICODE) {
t = FORMAT_UNICODE; t = FORMAT_UNICODE;
} else if (tok.toktype == TOK_URL) {
t = FORMAT_URL;
} else if (tok.toktype == TOK_REF) { } else if (tok.toktype == TOK_REF) {
t = FORMAT_CITATION; t = FORMAT_CITATION;
if (citationQPtr && !lookupCitation(*citationQPtr, tok.toktext)) { if (citationQPtr && !lookupCitation(*citationQPtr, tok.toktext)) {
@@ -223,15 +270,3 @@ void freeFormatElementArray(FormatElement* a, int length) {
free(a); free(a);
} }
#ifdef FORMATTER_TEST
#include <stdio.h>
const char* str = "My name in Chinese is \xe7\x86\x8a\xe5\xa4\xa7\xe8\xa1\x9b, or *xiong da wei*. My favorite Greek passage is \xe1\xbc\x90\xce\xbd \xe1\xbc\x80\xcf\x81\xcf\x87\xe1\xbf\x87 \xe1\xbc\xa6\xce\xbd \xe1\xbd\x81 \xce\xbb\xe1\xbd\xb9\xce\xb3\xce\xbf\xcf\x82.^{cite}";
int
main() {
FormatElement* lst;
int l = formatText(str, &lst, NULL);
return 0;
}
#endif

View File

@@ -34,10 +34,12 @@
typedef enum { typedef enum {
FORMAT_TEXT, FORMAT_TEXT,
FORMAT_EM, FORMAT_EM,
FORMAT_BR,
FORMAT_STRONG, FORMAT_STRONG,
FORMAT_CITATION, FORMAT_CITATION,
FORMAT_GREEK, FORMAT_GREEK,
FORMAT_UNICODE FORMAT_UNICODE,
FORMAT_URL
} FormatElementType; } FormatElementType;
typedef struct FormatElement FormatElement; typedef struct FormatElement FormatElement;

View File

@@ -31,6 +31,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unistd.h> #include <unistd.h>
#include <sys/wait.h>
#include "odt.h" #include "odt.h"
#include "options.h" #include "options.h"
@@ -50,7 +51,7 @@ extractTemplateDocument(const char* const templateFilename, const char* const te
pid_t child_pid; pid_t child_pid;
if ((child_pid = fork()) == 0) { if ((child_pid = fork()) == 0) {
/* you are the child */ /* you are the child */
char* const args[] = { "unzip", "-d", tempDir, templateFilename, NULL }; char* const args[] = { "unzip", "-d", (char* const)tempDir, (char* const)templateFilename, NULL };
freopen("/dev/null", "w", stdout); freopen("/dev/null", "w", stdout);
execvp("unzip", args); execvp("unzip", args);
perror("execvp"); perror("execvp");
@@ -72,9 +73,40 @@ createOutputDocument(const char* const outputFilename, const char* const tempDir
pid_t child_pid; pid_t child_pid;
if ((child_pid = fork()) == 0) { if ((child_pid = fork()) == 0) {
/* you are the child */ /* you are the child */
/* In some implementations, realpath will give an error
* if the file does not exist, so we need to run it on
* the output directory, not the as-yet nonexistent
* output filename. That's what all this (below) is for. */
char outputDir[FILENAME_MAX];
char outputBase[FILENAME_MAX];
char outputRealPath[FILENAME_MAX]; char outputRealPath[FILENAME_MAX];
char* const args[] = { "zip", "-r", outputRealPath, ".", NULL }; strncpy(outputDir, outputFilename, FILENAME_MAX);
realpath(outputFilename, outputRealPath); char *ptr = strrchr(outputDir, '/');
if (ptr) {
strncpy(outputBase, ptr + 1, FILENAME_MAX);
*(ptr + 1) = '\0';
} else {
strncpy(outputDir, ".", FILENAME_MAX);
strncpy(outputBase, outputFilename, FILENAME_MAX);
}
if (realpath(outputDir, outputRealPath) == NULL) {
/* hopefully this doesn't run, but it will give good info
* if it does */
perror("realpath");
fprintf(stderr, "outputFilename was \"%s\"\n", outputFilename);
char curdir[FILENAME_MAX];
getcwd(curdir, FILENAME_MAX);
fprintf(stderr, "curdir was \"%s\"\n", curdir);
exit(1);
}
/* Then we append the output filename. */
strncat(outputRealPath, "/", FILENAME_MAX);
strncat(outputRealPath, outputBase, FILENAME_MAX);
char* const args[] = { "zip", "-r", outputRealPath, "mimetype", ".", NULL };
chdir(tempDir); chdir(tempDir);
freopen("/dev/null", "w", stdout); freopen("/dev/null", "w", stdout);
execvp("zip", args); execvp("zip", args);
@@ -97,7 +129,7 @@ removeDirectory(const char* const tempDir) {
pid_t child_pid; pid_t child_pid;
if ((child_pid = fork()) == 0) { if ((child_pid = fork()) == 0) {
/* you are the child */ /* you are the child */
char* const args[] = { "rm", "-rf", tempDir, NULL }; char* const args[] = { "rm", "-rf", (char* const)tempDir, NULL };
freopen("/dev/null", "w", stdout); freopen("/dev/null", "w", stdout);
execvp("rm", args); execvp("rm", args);
perror("execvp"); perror("execvp");

View File

@@ -69,7 +69,7 @@ typedef struct { \
} N } N
#define NEW_QUEUE(T, N) \ #define NEW_QUEUE(T, N) \
T N = (T) { .length = 0, .head = NULL, .tail = NULL } T N = { .length = 0, .head = NULL, .tail = NULL }
/* WARNING: this is probably not what you want -- see DESTROY_QUEUE below */ /* WARNING: this is probably not what you want -- see DESTROY_QUEUE below */
#define REINIT_QUEUE(N) { \ #define REINIT_QUEUE(N) { \

View File

@@ -38,7 +38,8 @@ typedef struct {
typedef enum { typedef enum {
PARA_DEFAULT, PARA_DEFAULT,
PARA_BLOCKQUOTE PARA_BLOCKQUOTE,
PARA_BLOCKPRESERVE
} SermonParagraphType; } SermonParagraphType;
typedef struct { typedef struct {

View File

@@ -9,6 +9,7 @@
#define LEXPRINT(x...) #define LEXPRINT(x...)
#endif #endif
%} %}
%option noyywrap
%s HEADER HEADERVAL REFERENCEINTRO REFERENCENAME REFERENCE BLOCK %s HEADER HEADERVAL REFERENCEINTRO REFERENCENAME REFERENCE BLOCK
WHITESPACE [ \t] WHITESPACE [ \t]
ID [A-Za-z_][A-Za-z_0-9]* ID [A-Za-z_][A-Za-z_0-9]*
@@ -23,14 +24,23 @@ ID [A-Za-z_][A-Za-z_0-9]*
<REFERENCEINTRO>ref { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + 2; return KW_REF; } <REFERENCEINTRO>ref { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + 2; return KW_REF; }
<REFERENCEINTRO>: { BEGIN(REFERENCENAME); yylloc.first_column = ++yylloc.last_column; return ':'; } <REFERENCEINTRO>: { BEGIN(REFERENCENAME); yylloc.first_column = ++yylloc.last_column; return ':'; }
<REFERENCENAME>{ID} { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return ID; } <REFERENCENAME>{ID} { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return ID; }
<REFERENCENAME>: { BEGIN(REFERENCE); yylloc.first_column = ++yylloc.last_column; return ':'; } <REFERENCENAME>: { BEGIN(REFERENCE); yylloc.first_column = ++yylloc.last_column; return ':'; }
<REFERENCE>[^}]* { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return REFTEXT; } <REFERENCE>[^}]* { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return REFTEXT; }
<REFERENCE>[}] { BEGIN(INITIAL); yylloc.first_column = ++yylloc.last_column; return '}'; } <REFERENCE>[}] { BEGIN(INITIAL); yylloc.first_column = ++yylloc.last_column; return '}'; }
^[^[{>\n].* { yylloc.first_column = ++yylloc.last_column; yylval.sval = strdup(yytext); LEXPRINT("LINE: %s\n", yytext); return LINE; } ^[^[{>|\n].* { yylloc.first_column = ++yylloc.last_column; yylval.sval = strdup(yytext); LEXPRINT("LINE: %s\n", yytext); return LINE; }
^>{WHITESPACE}*\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; } ^[>|]{WHITESPACE}*\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; }
^>{WHITESPACE}* { BEGIN(BLOCK); yylloc.first_column = yylloc.last_column = 1; return '>'; } ^[>|]{WHITESPACE}* {
<BLOCK>..* { BEGIN(INITIAL); yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) - 1; yylval.sval = strdup(yytext); return LINE; } BEGIN(BLOCK);
yylloc.first_column = yylloc.last_column = 1;
return yytext[0];
}
<BLOCK>..* {
BEGIN(INITIAL);
yylloc.first_column = yylloc.last_column + 1;
yylloc.last_column = yylloc.first_column + strlen(yytext) - 1;
yylval.sval = strdup(yytext); return LINE;
}
<INITIAL>\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; } <INITIAL>\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; }
<<EOF>> { return 0; } <<EOF>> { return 0; }
%% %%

View File

@@ -37,7 +37,13 @@ char* lineQueueToString(LineQueue* lq) {
FOREACH_QUEUE(LineQueue, *lq, ptr) { FOREACH_QUEUE(LineQueue, *lq, ptr) {
strncat(dest + idx, ptr->data, paraLength - idx); strncat(dest + idx, ptr->data, paraLength - idx);
idx += strlen(ptr->data); idx += strlen(ptr->data);
if (ptr->next != NULL) dest[idx++] = ' '; if (ptr->next != NULL) {
if (dest[idx-1] != '\n')
dest[idx++] = ' ';
} else {
if (dest[idx-1] == '\n')
idx--;
}
free(ptr->data); free(ptr->data);
} }
FOREACH_QUEUE_END FOREACH_QUEUE_END
@@ -45,6 +51,14 @@ char* lineQueueToString(LineQueue* lq) {
return dest; return dest;
} }
char *lineWithBreak(const char *txt) {
size_t l = strlen(txt);
char* s = (char*)malloc(l+2);
strncpy(s, txt, l + 2);
s[l] = '\n';
return s;
}
void yyerror(Sermon*, const char*); void yyerror(Sermon*, const char*);
%} %}
@@ -126,7 +140,7 @@ block:
PARSEPRINT("Parsed paragraph:\n%s\n\n", p.paraText); PARSEPRINT("Parsed paragraph:\n%s\n\n", p.paraText);
free(paraText); free(paraText);
} }
| blockquote { | blockquote_or_preserve {
SermonParagraph p = { .paraType = PARA_BLOCKQUOTE }; SermonParagraph p = { .paraType = PARA_BLOCKQUOTE };
char* paraText = lineQueueToString(&lineQ); char* paraText = lineQueueToString(&lineQ);
FormatElement* paraContent = NULL; FormatElement* paraContent = NULL;
@@ -142,10 +156,17 @@ para:
para LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $2); } para LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $2); }
| LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $1); } | LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $1); }
; ;
blockquote_or_preserve:
blockquote
| blockpreserve
;
blockquote: blockquote:
blockquote '>' LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $3); } blockquote '>' LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $3); }
| '>' LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $2); } | '>' LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $2); }
; ;
blockpreserve:
blockpreserve '|' LINE '\n' { char* s = lineWithBreak($3); APPEND_QUEUE(LineQueue, lineQ, s); free($3); }
| '|' LINE '\n' { char* s = lineWithBreak($2); DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, s); free($2); }
references: references:
references reference break references reference break
| /* empty */ | /* empty */

View File

@@ -73,7 +73,7 @@ uint32_t utf8CharAt(const utf8iterator* iter) {
((uint32_t)(iter->txt[byteIndex] & 0x03) << 24); ((uint32_t)(iter->txt[byteIndex] & 0x03) << 24);
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) && } else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) && ((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&
@@ -112,7 +112,7 @@ _next_offset(const utf8iterator* iter) {
((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) { ((iter->txt[byteIndex + 4] & 0xc0) == 0x80)) {
return 5; return 5;
} else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) && } else if (((iter->txt[byteIndex] & 0xf7) == 0xfc) &&
((iter->txt[byteIndex + 1]) & 0xc0 == 0x80) && ((iter->txt[byteIndex + 1] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 2] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 2] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 3] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 3] & 0xc0) == 0x80) &&
((iter->txt[byteIndex + 4] & 0xc0) == 0x80) && ((iter->txt[byteIndex + 4] & 0xc0) == 0x80) &&

View File

@@ -1,3 +1,31 @@
/*
* xml.c - Create XML representation of sermon document
* Copyright © 2017 David A. Baer
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the organization nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY David A. Baer ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL David A. Baer BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <libxml/tree.h> #include <libxml/tree.h>
#include <string.h> #include <string.h>
#include "options.h" #include "options.h"
@@ -55,6 +83,8 @@ formatElementsToXML(
xmlNodePtr em = xmlNewNode(sermon_ns, "em"); xmlNodePtr em = xmlNewNode(sermon_ns, "em");
formatElementsToXML(sermon_ns, em, a[i].elementContent.nestedContent, a[i].elementContentLength, numReferences, sermonReferencesPtr); formatElementsToXML(sermon_ns, em, a[i].elementContent.nestedContent, a[i].elementContentLength, numReferences, sermonReferencesPtr);
xmlAddChild(parentElement, em); xmlAddChild(parentElement, em);
} else if (a[i].elementType == FORMAT_BR) {
xmlAddChild(parentElement, xmlNewNode(sermon_ns, "br"));
} else if (a[i].elementType == FORMAT_TEXT) { } else if (a[i].elementType == FORMAT_TEXT) {
xmlAddChild(parentElement, xmlNewText(a[i].elementContent.textContent)); xmlAddChild(parentElement, xmlNewText(a[i].elementContent.textContent));
} else if (a[i].elementType == FORMAT_GREEK) { } else if (a[i].elementType == FORMAT_GREEK) {
@@ -65,6 +95,11 @@ formatElementsToXML(
xmlNodePtr unicode = xmlNewNode(sermon_ns, "unicode"); xmlNodePtr unicode = xmlNewNode(sermon_ns, "unicode");
xmlAddChild(unicode, xmlNewText(a[i].elementContent.textContent)); xmlAddChild(unicode, xmlNewText(a[i].elementContent.textContent));
xmlAddChild(parentElement, unicode); xmlAddChild(parentElement, unicode);
} else if (a[i].elementType == FORMAT_URL) {
xmlNodePtr link = xmlNewNode(sermon_ns, "link");
xmlSetProp(link, "href", a[i].elementContent.textContent);
xmlAddChild(link, xmlNewText(a[i].elementContent.textContent));
xmlAddChild(parentElement, link);
} else if (a[i].elementType == FORMAT_CITATION) { } else if (a[i].elementType == FORMAT_CITATION) {
xmlNodePtr cite = xmlNewNode(sermon_ns, "cite"); xmlNodePtr cite = xmlNewNode(sermon_ns, "cite");
int num = findReferenceNumber(numReferences, sermonReferencesPtr, a[i].elementContent.textContent); int num = findReferenceNumber(numReferences, sermonReferencesPtr, a[i].elementContent.textContent);

View File

@@ -1,3 +1,31 @@
/*
* xml.h - create XML representation of sermon document
* Copyright © 2017 David A. Baer
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the organization nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY David A. Baer ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL David A. Baer BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _XML_H #ifndef _XML_H
#define _XML_H #define _XML_H

View File

@@ -1,4 +1,5 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <libxml/tree.h> #include <libxml/tree.h>
#include <libxslt/xslt.h> #include <libxslt/xslt.h>