Reworked formatting code
Implemented paragraph formatting Fixed bug with newlines in pre tag
This commit is contained in:
parent
2015aeef89
commit
253ca75822
1 changed files with 132 additions and 75 deletions
|
@ -278,49 +278,53 @@ public final class HtmlEncoder {
|
||||||
// conversion around them to look good. However, they differ
|
// conversion around them to look good. However, they differ
|
||||||
// in how many newlines around them should ignored. These sets
|
// in how many newlines around them should ignored. These sets
|
||||||
// help to treat each tag right in newline conversion.
|
// help to treat each tag right in newline conversion.
|
||||||
static final HashSet swallowAll = new HashSet();
|
static final HashSet internalTags = new HashSet();
|
||||||
static final HashSet swallowTwo = new HashSet();
|
static final HashSet blockTags = new HashSet();
|
||||||
static final HashSet swallowOne = new HashSet();
|
static final HashSet semiBlockTags = new HashSet();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
// actual block level elements
|
// actual block level elements
|
||||||
swallowOne.add("address");
|
semiBlockTags.add("address");
|
||||||
swallowTwo.add("blockquote");
|
semiBlockTags.add("dir");
|
||||||
swallowTwo.add("center");
|
semiBlockTags.add("div");
|
||||||
swallowOne.add("dir");
|
semiBlockTags.add("table");
|
||||||
swallowOne.add("div");
|
|
||||||
swallowTwo.add("dl");
|
blockTags.add("blockquote");
|
||||||
swallowTwo.add("fieldset");
|
blockTags.add("center");
|
||||||
swallowTwo.add("form");
|
blockTags.add("dl");
|
||||||
swallowTwo.add("h1");
|
blockTags.add("fieldset");
|
||||||
swallowTwo.add("h2");
|
blockTags.add("form");
|
||||||
swallowTwo.add("h3");
|
blockTags.add("h1");
|
||||||
swallowTwo.add("h4");
|
blockTags.add("h2");
|
||||||
swallowTwo.add("h5");
|
blockTags.add("h3");
|
||||||
swallowTwo.add("h6");
|
blockTags.add("h4");
|
||||||
swallowTwo.add("hr");
|
blockTags.add("h5");
|
||||||
swallowTwo.add("isindex");
|
blockTags.add("h6");
|
||||||
swallowAll.add("menu");
|
blockTags.add("hr");
|
||||||
swallowAll.add("noframes");
|
blockTags.add("isindex");
|
||||||
swallowAll.add("noscript");
|
blockTags.add("ol");
|
||||||
swallowTwo.add("ol");
|
blockTags.add("p");
|
||||||
swallowTwo.add("p");
|
blockTags.add("pre");
|
||||||
swallowTwo.add("pre");
|
blockTags.add("ul");
|
||||||
swallowOne.add("table");
|
|
||||||
swallowTwo.add("ul");
|
internalTags.add("menu");
|
||||||
|
internalTags.add("noframes");
|
||||||
|
internalTags.add("noscript");
|
||||||
|
|
||||||
/// to be treated as block level elements
|
/// to be treated as block level elements
|
||||||
swallowTwo.add("br");
|
semiBlockTags.add("th");
|
||||||
swallowTwo.add("dd");
|
|
||||||
swallowTwo.add("dt");
|
blockTags.add("br");
|
||||||
swallowTwo.add("frameset");
|
blockTags.add("dd");
|
||||||
swallowTwo.add("li");
|
blockTags.add("dt");
|
||||||
swallowAll.add("tbody");
|
blockTags.add("frameset");
|
||||||
swallowTwo.add("td");
|
blockTags.add("li");
|
||||||
swallowAll.add("tfoot");
|
blockTags.add("td");
|
||||||
swallowOne.add("th");
|
|
||||||
swallowAll.add("thead");
|
internalTags.add("tbody");
|
||||||
swallowAll.add("tr");
|
internalTags.add("tfoot");
|
||||||
|
internalTags.add("thead");
|
||||||
|
internalTags.add("tr");
|
||||||
}
|
}
|
||||||
|
|
||||||
// set of tags that are always empty
|
// set of tags that are always empty
|
||||||
|
@ -347,8 +351,12 @@ public final class HtmlEncoder {
|
||||||
static final byte TAG_ATT_NAME = 2;
|
static final byte TAG_ATT_NAME = 2;
|
||||||
static final byte TAG_ATT_VAL = 3;
|
static final byte TAG_ATT_VAL = 3;
|
||||||
|
|
||||||
static final String newLine = System.getProperty("line.separator");
|
static final byte TEXT = 0;
|
||||||
|
static final byte SEMIBLOCK = 1;
|
||||||
|
static final byte BLOCK = 2;
|
||||||
|
static final byte INTERNAL = 3;
|
||||||
|
|
||||||
|
static final String newLine = System.getProperty("line.separator");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Do "smart" encodging on a string. This means that valid HTML entities and tags,
|
* Do "smart" encodging on a string. This means that valid HTML entities and tags,
|
||||||
|
@ -369,7 +377,7 @@ public final class HtmlEncoder {
|
||||||
// try to make stringbuffer large enough from the start
|
// try to make stringbuffer large enough from the start
|
||||||
StringBuffer ret = new StringBuffer(Math.round(l * 1.4f));
|
StringBuffer ret = new StringBuffer(Math.round(l * 1.4f));
|
||||||
|
|
||||||
encode(str, ret, null);
|
encode(str, ret, false, null);
|
||||||
|
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
|
@ -380,21 +388,39 @@ public final class HtmlEncoder {
|
||||||
* other occurrences of '<', '>' and '&' are encoded to HTML entities.
|
* other occurrences of '<', '>' and '&' are encoded to HTML entities.
|
||||||
*/
|
*/
|
||||||
public final static void encode(String str, StringBuffer ret) {
|
public final static void encode(String str, StringBuffer ret) {
|
||||||
encode(str, ret, null);
|
encode(str, ret, false, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Do "smart" encodging on a string. This means that valid HTML entities and tags,
|
* Do "smart" encodging on a string. This means that valid HTML entities and tags,
|
||||||
* Helma macros and HTML comments are passed through unescaped, while
|
* Helma macros and HTML comments are passed through unescaped, while
|
||||||
* other occurrences of '<', '>' and '&' are encoded to HTML entities.
|
* other occurrences of '<', '>' and '&' are encoded to HTML entities.
|
||||||
|
*
|
||||||
|
* @param str the string to encode
|
||||||
|
* @param ret the string buffer to encode to
|
||||||
|
* @param paragraphs if true use p tags for paragraphs, otherwise just use br's
|
||||||
|
* @param allowedTags a set containing the names of allowed tags as strings. All other
|
||||||
|
* tags will be escaped
|
||||||
*/
|
*/
|
||||||
public final static void encode(String str, StringBuffer ret, Set allowedTags) {
|
public final static void encode(String str, StringBuffer ret,
|
||||||
|
boolean paragraphs, Set allowedTags) {
|
||||||
if (str == null) {
|
if (str == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int l = str.length();
|
int l = str.length();
|
||||||
|
|
||||||
|
// where to insert the <p> tag in case we want to create a paragraph later on
|
||||||
|
int paragraphStart = ret.length();
|
||||||
|
|
||||||
|
// what kind of element/text are we leaving and entering?
|
||||||
|
// this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
|
||||||
|
// depending on this information, we decide whether and how to insert
|
||||||
|
// paragraphs and line breaks. "entering" a tag means we're at the '<'
|
||||||
|
// and exiting means we're at the '>', not that it's a start or close tag.
|
||||||
|
byte entering = TEXT;
|
||||||
|
byte exiting = TEXT;
|
||||||
|
|
||||||
Stack openTags = new Stack();
|
Stack openTags = new Stack();
|
||||||
|
|
||||||
// are we currently within a < and a > that consitute some kind of tag?
|
// are we currently within a < and a > that consitute some kind of tag?
|
||||||
|
@ -424,15 +450,14 @@ public final class HtmlEncoder {
|
||||||
char htmlQuoteChar = '\u0000';
|
char htmlQuoteChar = '\u0000';
|
||||||
char macroQuoteChar = '\u0000';
|
char macroQuoteChar = '\u0000';
|
||||||
|
|
||||||
// number of newlines to ignore in \n -> <br> conversion
|
|
||||||
int swallowLinebreaks = 0;
|
|
||||||
|
|
||||||
// number of newlines met since the last non-whitespace character
|
// number of newlines met since the last non-whitespace character
|
||||||
int linebreaks = 0;
|
int linebreaks = 0;
|
||||||
|
|
||||||
// did we meet a backslash escape?
|
// did we meet a backslash escape?
|
||||||
boolean escape = false;
|
boolean escape = false;
|
||||||
|
|
||||||
|
boolean triggerBreak = false;
|
||||||
|
|
||||||
for (int i = 0; i < l; i++) {
|
for (int i = 0; i < l; i++) {
|
||||||
char c = str.charAt(i);
|
char c = str.charAt(i);
|
||||||
|
|
||||||
|
@ -475,20 +500,19 @@ public final class HtmlEncoder {
|
||||||
htmlQuoteChar = '\u0000';
|
htmlQuoteChar = '\u0000';
|
||||||
htmlTagMode = TAG_NAME;
|
htmlTagMode = TAG_NAME;
|
||||||
|
|
||||||
// set ignoreNewline on some tags, depending on wheather they're
|
exiting = entering;
|
||||||
// being opened or closed.
|
entering = TEXT;
|
||||||
// what's going on here? we switch newline encoding on inside some tags, for
|
|
||||||
// others we switch it on when they're closed
|
|
||||||
linebreaks = Math.max(linebreaks - swallowLinebreaks, 0);
|
|
||||||
|
|
||||||
if (swallowAll.contains(tagName)) {
|
if (internalTags.contains(tagName)) {
|
||||||
swallowLinebreaks = 1000;
|
entering = INTERNAL;
|
||||||
} else if (swallowTwo.contains(tagName)) {
|
} else if (blockTags.contains(tagName)) {
|
||||||
swallowLinebreaks = 2;
|
entering = BLOCK;
|
||||||
} else if (swallowOne.contains(tagName)) {
|
} else if (semiBlockTags.contains(tagName)) {
|
||||||
swallowLinebreaks = 1;
|
entering = paragraphs ? BLOCK : SEMIBLOCK;
|
||||||
} else {
|
}
|
||||||
swallowLinebreaks = 0;
|
|
||||||
|
if (entering > 0) {
|
||||||
|
triggerBreak = !insidePreTag;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (insideCloseTag) {
|
if (insideCloseTag) {
|
||||||
|
@ -513,7 +537,6 @@ public final class HtmlEncoder {
|
||||||
openTags.pop();
|
openTags.pop();
|
||||||
} else {
|
} else {
|
||||||
openTags.push(tagName);
|
openTags.push(tagName);
|
||||||
swallowLinebreaks = Math.max(swallowLinebreaks - 1, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("code".equals(tagName) && !insideCloseTag) {
|
if ("code".equals(tagName) && !insideCloseTag) {
|
||||||
|
@ -526,25 +549,44 @@ public final class HtmlEncoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // if (i < l-2)
|
||||||
// if (i < l-2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((linebreaks > 0 || swallowLinebreaks > 0) && !Character.isWhitespace(c)) {
|
if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {
|
||||||
if (!insidePreTag) {
|
|
||||||
for (int k = 0; k < linebreaks; k++) {
|
if (!insideTag) {
|
||||||
if (k >= swallowLinebreaks) {
|
exiting = entering;
|
||||||
|
entering = TEXT;
|
||||||
|
if (exiting >= SEMIBLOCK) {
|
||||||
|
paragraphStart = ret.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entering != INTERNAL && exiting != INTERNAL) {
|
||||||
|
int swallowBreaks = 0;
|
||||||
|
if (paragraphs && (entering != BLOCK || exiting != BLOCK) &&
|
||||||
|
(exiting < BLOCK) &&
|
||||||
|
(entering >= SEMIBLOCK || linebreaks > 1) &&
|
||||||
|
paragraphStart < ret.length()) {
|
||||||
|
ret.insert(paragraphStart, "<p>");
|
||||||
|
ret.append("</p>");
|
||||||
|
swallowBreaks = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k = linebreaks-1; k>=0; k--) {
|
||||||
|
if (k >= swallowBreaks && k >= entering && k >= exiting) {
|
||||||
ret.append("<br />");
|
ret.append("<br />");
|
||||||
}
|
}
|
||||||
ret.append(newLine);
|
ret.append(newLine);
|
||||||
}
|
}
|
||||||
}
|
if (exiting >= SEMIBLOCK || linebreaks > 1) {
|
||||||
|
paragraphStart = ret.length();
|
||||||
|
}
|
||||||
|
|
||||||
if (!insideTag) {
|
|
||||||
swallowLinebreaks = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
linebreaks = 0;
|
linebreaks = 0;
|
||||||
|
triggerBreak = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
@ -632,17 +674,18 @@ public final class HtmlEncoder {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '\n':
|
case '\n':
|
||||||
if (!insideTag) {
|
if (insideTag || insidePreTag) {
|
||||||
linebreaks++;
|
|
||||||
} else {
|
|
||||||
ret.append('\n');
|
ret.append('\n');
|
||||||
|
} else {
|
||||||
|
linebreaks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case '\r':
|
case '\r':
|
||||||
if (!insideTag) {
|
if (insideTag || insidePreTag) {
|
||||||
break;
|
ret.append('\r');
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case '>':
|
case '>':
|
||||||
|
|
||||||
|
@ -673,12 +716,19 @@ public final class HtmlEncoder {
|
||||||
openTags.pop();
|
openTags.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exiting = entering;
|
||||||
|
if (exiting > 0) {
|
||||||
|
triggerBreak = !insidePreTag;
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ret.append(">");
|
ret.append(">");
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we still are inside any kind of tag
|
// check if we still are inside any kind of tag
|
||||||
insideTag = insideComment || insideMacroTag || insideHtmlTag;
|
insideTag = insideComment || insideMacroTag || insideHtmlTag;
|
||||||
|
insideCloseTag = insideTag;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -739,9 +789,16 @@ public final class HtmlEncoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
// add remaining newlines we may have collected
|
// add remaining newlines we may have collected
|
||||||
|
int swallowBreaks = 0;
|
||||||
|
if (paragraphs && exiting < BLOCK) {
|
||||||
|
ret.insert(paragraphStart, "<p>");
|
||||||
|
ret.append("</p>");
|
||||||
|
swallowBreaks = 2;
|
||||||
|
}
|
||||||
|
|
||||||
if (linebreaks > 0) {
|
if (linebreaks > 0) {
|
||||||
for (int i = 0; i < linebreaks; i++) {
|
for (int i = linebreaks-1; i>=0; i--) {
|
||||||
if (i >= swallowLinebreaks) {
|
if (i >= swallowBreaks && i > exiting) {
|
||||||
ret.append("<br />");
|
ret.append("<br />");
|
||||||
}
|
}
|
||||||
ret.append(newLine);
|
ret.append(newLine);
|
||||||
|
|
Loading…
Add table
Reference in a new issue