From 95834f00ff9d6d10a09bc08b0e54548268f15d63 Mon Sep 17 00:00:00 2001 From: David Baer Date: Tue, 17 Jul 2018 15:56:58 -0400 Subject: [PATCH] Enable line-breaking blockquotes (prefaced by |) TODO: No indentation on first line --- data/ms_odt.xsl | 2 +- src/format.c | 12 +++++++++++- src/format.h | 1 + src/sermon.h | 3 ++- src/sermon_lexer.l | 25 +++++++++++++++++-------- src/sermon_parser.y | 25 +++++++++++++++++++++++-- src/xml.c | 2 ++ 7 files changed, 57 insertions(+), 13 deletions(-) diff --git a/data/ms_odt.xsl b/data/ms_odt.xsl index 19d3fdb..f64fb92 100644 --- a/data/ms_odt.xsl +++ b/data/ms_odt.xsl @@ -78,7 +78,7 @@ - + diff --git a/src/format.c b/src/format.c index 0ffd8b0..117f676 100644 --- a/src/format.c +++ b/src/format.c @@ -45,6 +45,7 @@ typedef enum { TOK_STAR, TOK_REF, TOK_URL, + TOK_BREAK, /* TOK_DASH, TOK_OPEN_DOUBLE_QUOTE, @@ -115,6 +116,11 @@ nextToken(Tokenizer tokenizer) { result.toktype = TOK_STAR; result.toktext = NULL; return result; + } else if (ch == '\n') { + utf8Advance(tokenizer); + result.toktype = TOK_BREAK; + result.toktext = NULL; + return result; } else if (greekChar(ch)) { while ((ch != 0) && (greekChar(ch) || (ch == ' ') || (ch == ',') || (ch == '.'))) { @@ -158,7 +164,7 @@ nextToken(Tokenizer tokenizer) { result.toktext = strndup(tokenizer->txt + startIndex, endIndex - startIndex); return result; } else if (latinChar(ch)) { - while ((ch != 0) && latinChar(ch) && (ch != '*')) { + while ((ch != 0) && latinChar(ch) && (ch != '*') && (ch != '\n')) { utf8Advance(tokenizer); ch = utf8CharAt(tokenizer); if (ch == '^') { @@ -206,6 +212,10 @@ int formatText(const char* txt, FormatElement** dst, CitationRecordQueue* citati REINIT_QUEUE(formatElementQ); em = 1; } + } else if (tok.toktype == TOK_BREAK) { + FormatElement elt = { .elementType = FORMAT_BR, .elementContentLength = 0, + .elementContent = { .textContent = NULL } }; + APPEND_QUEUE(FormatElementQueue, formatElementQ, elt); } else { FormatElementType t; FormatElement elt = { .elementContent = { .textContent = tok.toktext } } ; diff --git a/src/format.h b/src/format.h index ac48e96..f72d4e6 100644 --- a/src/format.h +++ b/src/format.h @@ -34,6 +34,7 @@ typedef enum { FORMAT_TEXT, FORMAT_EM, + FORMAT_BR, FORMAT_STRONG, FORMAT_CITATION, FORMAT_GREEK, diff --git a/src/sermon.h b/src/sermon.h index a70d400..d7717c0 100644 --- a/src/sermon.h +++ b/src/sermon.h @@ -38,7 +38,8 @@ typedef struct { typedef enum { PARA_DEFAULT, - PARA_BLOCKQUOTE + PARA_BLOCKQUOTE, + PARA_BLOCKPRESERVE } SermonParagraphType; typedef struct { diff --git a/src/sermon_lexer.l b/src/sermon_lexer.l index 76554ca..d53be14 100644 --- a/src/sermon_lexer.l +++ b/src/sermon_lexer.l @@ -24,14 +24,23 @@ ID [A-Za-z_][A-Za-z_0-9]* ref { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + 2; return KW_REF; } : { BEGIN(REFERENCENAME); yylloc.first_column = ++yylloc.last_column; return ':'; } {ID} { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return ID; } -: { BEGIN(REFERENCE); yylloc.first_column = ++yylloc.last_column; return ':'; } -[^}]* { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return REFTEXT; } -[}] { BEGIN(INITIAL); yylloc.first_column = ++yylloc.last_column; return '}'; } -^[^[{>\n].* { yylloc.first_column = ++yylloc.last_column; yylval.sval = strdup(yytext); LEXPRINT("LINE: %s\n", yytext); return LINE; } -^>{WHITESPACE}*\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; } -^>{WHITESPACE}* { BEGIN(BLOCK); yylloc.first_column = yylloc.last_column = 1; return '>'; } -..* { BEGIN(INITIAL); yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) - 1; yylval.sval = strdup(yytext); return LINE; } +: { BEGIN(REFERENCE); yylloc.first_column = ++yylloc.last_column; return ':'; } +[^}]* { yylloc.first_column = yylloc.last_column + 1; yylloc.last_column = yylloc.first_column + strlen(yytext) + 1; yylval.sval = strdup(yytext); return REFTEXT; } +[}] { BEGIN(INITIAL); yylloc.first_column = ++yylloc.last_column; return '}'; } +^[^[{>|\n].* { yylloc.first_column = ++yylloc.last_column; yylval.sval = strdup(yytext); LEXPRINT("LINE: %s\n", yytext); return LINE; } +^[>|]{WHITESPACE}*\n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; } +^[>|]{WHITESPACE}* { + BEGIN(BLOCK); + yylloc.first_column = yylloc.last_column = 1; + return yytext[0]; + } +..* { + BEGIN(INITIAL); + yylloc.first_column = yylloc.last_column + 1; + yylloc.last_column = yylloc.first_column + strlen(yytext) - 1; + yylval.sval = strdup(yytext); return LINE; + } \n { yylloc.first_column = yylloc.last_column = 0; yylloc.first_line++; yylloc.last_line++; return '\n'; } -<> { return 0; } +<> { return 0; } %% diff --git a/src/sermon_parser.y b/src/sermon_parser.y index bf95352..c07e0ce 100644 --- a/src/sermon_parser.y +++ b/src/sermon_parser.y @@ -37,7 +37,13 @@ char* lineQueueToString(LineQueue* lq) { FOREACH_QUEUE(LineQueue, *lq, ptr) { strncat(dest + idx, ptr->data, paraLength - idx); idx += strlen(ptr->data); - if (ptr->next != NULL) dest[idx++] = ' '; + if (ptr->next != NULL) { + if (dest[idx-1] != '\n') + dest[idx++] = ' '; + } else { + if (dest[idx-1] == '\n') + idx--; + } free(ptr->data); } FOREACH_QUEUE_END @@ -45,6 +51,14 @@ char* lineQueueToString(LineQueue* lq) { return dest; } +char *lineWithBreak(const char *txt) { + size_t l = strlen(txt); + char* s = (char*)malloc(l+2); + strncpy(s, txt, l + 2); + s[l] = '\n'; + return s; +} + void yyerror(Sermon*, const char*); %} @@ -126,7 +140,7 @@ block: PARSEPRINT("Parsed paragraph:\n%s\n\n", p.paraText); free(paraText); } - | blockquote { + | blockquote_or_preserve { SermonParagraph p = { .paraType = PARA_BLOCKQUOTE }; char* paraText = lineQueueToString(&lineQ); FormatElement* paraContent = NULL; @@ -142,10 +156,17 @@ para: para LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $2); } | LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $1); } ; +blockquote_or_preserve: + blockquote + | blockpreserve + ; blockquote: blockquote '>' LINE '\n' { APPEND_QUEUE(LineQueue, lineQ, $3); } | '>' LINE '\n' { DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, $2); } ; +blockpreserve: + blockpreserve '|' LINE '\n' { char* s = lineWithBreak($3); APPEND_QUEUE(LineQueue, lineQ, s); free($3); } + | '|' LINE '\n' { char* s = lineWithBreak($2); DESTROY_QUEUE(LineQueue, lineQ); APPEND_QUEUE(LineQueue, lineQ, s); free($2); } references: references reference break | /* empty */ diff --git a/src/xml.c b/src/xml.c index 6184052..72a6a06 100644 --- a/src/xml.c +++ b/src/xml.c @@ -83,6 +83,8 @@ formatElementsToXML( xmlNodePtr em = xmlNewNode(sermon_ns, "em"); formatElementsToXML(sermon_ns, em, a[i].elementContent.nestedContent, a[i].elementContentLength, numReferences, sermonReferencesPtr); xmlAddChild(parentElement, em); + } else if (a[i].elementType == FORMAT_BR) { + xmlAddChild(parentElement, xmlNewNode(sermon_ns, "br")); } else if (a[i].elementType == FORMAT_TEXT) { xmlAddChild(parentElement, xmlNewText(a[i].elementContent.textContent)); } else if (a[i].elementType == FORMAT_GREEK) {