Reworked formatting code

Implemented paragraph formatting
Fixed bug with newlines in pre tag
This commit is contained in:
hns 2003-09-19 16:12:01 +00:00
parent 2015aeef89
commit 253ca75822

View file

@ -278,49 +278,53 @@ public final class HtmlEncoder {
// conversion around them to look good. However, they differ // conversion around them to look good. However, they differ
// in how many newlines around them should ignored. These sets // in how many newlines around them should ignored. These sets
// help to treat each tag right in newline conversion. // help to treat each tag right in newline conversion.
static final HashSet swallowAll = new HashSet(); static final HashSet internalTags = new HashSet();
static final HashSet swallowTwo = new HashSet(); static final HashSet blockTags = new HashSet();
static final HashSet swallowOne = new HashSet(); static final HashSet semiBlockTags = new HashSet();
static { static {
// actual block level elements // actual block level elements
swallowOne.add("address"); semiBlockTags.add("address");
swallowTwo.add("blockquote"); semiBlockTags.add("dir");
swallowTwo.add("center"); semiBlockTags.add("div");
swallowOne.add("dir"); semiBlockTags.add("table");
swallowOne.add("div");
swallowTwo.add("dl"); blockTags.add("blockquote");
swallowTwo.add("fieldset"); blockTags.add("center");
swallowTwo.add("form"); blockTags.add("dl");
swallowTwo.add("h1"); blockTags.add("fieldset");
swallowTwo.add("h2"); blockTags.add("form");
swallowTwo.add("h3"); blockTags.add("h1");
swallowTwo.add("h4"); blockTags.add("h2");
swallowTwo.add("h5"); blockTags.add("h3");
swallowTwo.add("h6"); blockTags.add("h4");
swallowTwo.add("hr"); blockTags.add("h5");
swallowTwo.add("isindex"); blockTags.add("h6");
swallowAll.add("menu"); blockTags.add("hr");
swallowAll.add("noframes"); blockTags.add("isindex");
swallowAll.add("noscript"); blockTags.add("ol");
swallowTwo.add("ol"); blockTags.add("p");
swallowTwo.add("p"); blockTags.add("pre");
swallowTwo.add("pre"); blockTags.add("ul");
swallowOne.add("table");
swallowTwo.add("ul"); internalTags.add("menu");
internalTags.add("noframes");
internalTags.add("noscript");
/// to be treated as block level elements /// to be treated as block level elements
swallowTwo.add("br"); semiBlockTags.add("th");
swallowTwo.add("dd");
swallowTwo.add("dt"); blockTags.add("br");
swallowTwo.add("frameset"); blockTags.add("dd");
swallowTwo.add("li"); blockTags.add("dt");
swallowAll.add("tbody"); blockTags.add("frameset");
swallowTwo.add("td"); blockTags.add("li");
swallowAll.add("tfoot"); blockTags.add("td");
swallowOne.add("th");
swallowAll.add("thead"); internalTags.add("tbody");
swallowAll.add("tr"); internalTags.add("tfoot");
internalTags.add("thead");
internalTags.add("tr");
} }
// set of tags that are always empty // set of tags that are always empty
@ -347,8 +351,12 @@ public final class HtmlEncoder {
static final byte TAG_ATT_NAME = 2; static final byte TAG_ATT_NAME = 2;
static final byte TAG_ATT_VAL = 3; static final byte TAG_ATT_VAL = 3;
static final String newLine = System.getProperty("line.separator"); static final byte TEXT = 0;
static final byte SEMIBLOCK = 1;
static final byte BLOCK = 2;
static final byte INTERNAL = 3;
static final String newLine = System.getProperty("line.separator");
/** /**
* Do "smart" encodging on a string. This means that valid HTML entities and tags, * Do "smart" encodging on a string. This means that valid HTML entities and tags,
@ -369,7 +377,7 @@ public final class HtmlEncoder {
// try to make stringbuffer large enough from the start // try to make stringbuffer large enough from the start
StringBuffer ret = new StringBuffer(Math.round(l * 1.4f)); StringBuffer ret = new StringBuffer(Math.round(l * 1.4f));
encode(str, ret, null); encode(str, ret, false, null);
return ret.toString(); return ret.toString();
} }
@ -380,21 +388,39 @@ public final class HtmlEncoder {
* other occurrences of '<', '>' and '&' are encoded to HTML entities. * other occurrences of '<', '>' and '&' are encoded to HTML entities.
*/ */
public final static void encode(String str, StringBuffer ret) { public final static void encode(String str, StringBuffer ret) {
encode(str, ret, null); encode(str, ret, false, null);
} }
/** /**
* Do "smart" encodging on a string. This means that valid HTML entities and tags, * Do "smart" encodging on a string. This means that valid HTML entities and tags,
* Helma macros and HTML comments are passed through unescaped, while * Helma macros and HTML comments are passed through unescaped, while
* other occurrences of '<', '>' and '&' are encoded to HTML entities. * other occurrences of '<', '>' and '&' are encoded to HTML entities.
*
* @param str the string to encode
* @param ret the string buffer to encode to
* @param paragraphs if true use p tags for paragraphs, otherwise just use br's
* @param allowedTags a set containing the names of allowed tags as strings. All other
* tags will be escaped
*/ */
public final static void encode(String str, StringBuffer ret, Set allowedTags) { public final static void encode(String str, StringBuffer ret,
boolean paragraphs, Set allowedTags) {
if (str == null) { if (str == null) {
return; return;
} }
int l = str.length(); int l = str.length();
// where to insert the <p> tag in case we want to create a paragraph later on
int paragraphStart = ret.length();
// what kind of element/text are we leaving and entering?
// this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
// depending on this information, we decide whether and how to insert
// paragraphs and line breaks. "entering" a tag means we're at the '<'
// and exiting means we're at the '>', not that it's a start or close tag.
byte entering = TEXT;
byte exiting = TEXT;
Stack openTags = new Stack(); Stack openTags = new Stack();
// are we currently within a < and a > that consitute some kind of tag? // are we currently within a < and a > that consitute some kind of tag?
@ -424,15 +450,14 @@ public final class HtmlEncoder {
char htmlQuoteChar = '\u0000'; char htmlQuoteChar = '\u0000';
char macroQuoteChar = '\u0000'; char macroQuoteChar = '\u0000';
// number of newlines to ignore in \n -> <br> conversion
int swallowLinebreaks = 0;
// number of newlines met since the last non-whitespace character // number of newlines met since the last non-whitespace character
int linebreaks = 0; int linebreaks = 0;
// did we meet a backslash escape? // did we meet a backslash escape?
boolean escape = false; boolean escape = false;
boolean triggerBreak = false;
for (int i = 0; i < l; i++) { for (int i = 0; i < l; i++) {
char c = str.charAt(i); char c = str.charAt(i);
@ -475,20 +500,19 @@ public final class HtmlEncoder {
htmlQuoteChar = '\u0000'; htmlQuoteChar = '\u0000';
htmlTagMode = TAG_NAME; htmlTagMode = TAG_NAME;
// set ignoreNewline on some tags, depending on wheather they're exiting = entering;
// being opened or closed. entering = TEXT;
// what's going on here? we switch newline encoding on inside some tags, for
// others we switch it on when they're closed
linebreaks = Math.max(linebreaks - swallowLinebreaks, 0);
if (swallowAll.contains(tagName)) { if (internalTags.contains(tagName)) {
swallowLinebreaks = 1000; entering = INTERNAL;
} else if (swallowTwo.contains(tagName)) { } else if (blockTags.contains(tagName)) {
swallowLinebreaks = 2; entering = BLOCK;
} else if (swallowOne.contains(tagName)) { } else if (semiBlockTags.contains(tagName)) {
swallowLinebreaks = 1; entering = paragraphs ? BLOCK : SEMIBLOCK;
} else { }
swallowLinebreaks = 0;
if (entering > 0) {
triggerBreak = !insidePreTag;
} }
if (insideCloseTag) { if (insideCloseTag) {
@ -513,7 +537,6 @@ public final class HtmlEncoder {
openTags.pop(); openTags.pop();
} else { } else {
openTags.push(tagName); openTags.push(tagName);
swallowLinebreaks = Math.max(swallowLinebreaks - 1, 0);
} }
if ("code".equals(tagName) && !insideCloseTag) { if ("code".equals(tagName) && !insideCloseTag) {
@ -526,25 +549,44 @@ public final class HtmlEncoder {
} }
} }
} }
} } // if (i < l-2)
// if (i < l-2)
} }
if ((linebreaks > 0 || swallowLinebreaks > 0) && !Character.isWhitespace(c)) { if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {
if (!insidePreTag) {
for (int k = 0; k < linebreaks; k++) { if (!insideTag) {
if (k >= swallowLinebreaks) { exiting = entering;
entering = TEXT;
if (exiting >= SEMIBLOCK) {
paragraphStart = ret.length();
}
}
if (entering != INTERNAL && exiting != INTERNAL) {
int swallowBreaks = 0;
if (paragraphs && (entering != BLOCK || exiting != BLOCK) &&
(exiting < BLOCK) &&
(entering >= SEMIBLOCK || linebreaks > 1) &&
paragraphStart < ret.length()) {
ret.insert(paragraphStart, "<p>");
ret.append("</p>");
swallowBreaks = 2;
}
for (int k = linebreaks-1; k>=0; k--) {
if (k >= swallowBreaks && k >= entering && k >= exiting) {
ret.append("<br />"); ret.append("<br />");
} }
ret.append(newLine); ret.append(newLine);
} }
} if (exiting >= SEMIBLOCK || linebreaks > 1) {
paragraphStart = ret.length();
}
if (!insideTag) {
swallowLinebreaks = 0;
} }
linebreaks = 0; linebreaks = 0;
triggerBreak = false;
} }
switch (c) { switch (c) {
@ -632,17 +674,18 @@ public final class HtmlEncoder {
break; break;
case '\n': case '\n':
if (!insideTag) { if (insideTag || insidePreTag) {
linebreaks++;
} else {
ret.append('\n'); ret.append('\n');
} else {
linebreaks++;
} }
break; break;
case '\r': case '\r':
if (!insideTag) { if (insideTag || insidePreTag) {
break; ret.append('\r');
} }
break;
case '>': case '>':
@ -673,12 +716,19 @@ public final class HtmlEncoder {
openTags.pop(); openTags.pop();
} }
} }
exiting = entering;
if (exiting > 0) {
triggerBreak = !insidePreTag;
}
} else { } else {
ret.append("&gt;"); ret.append("&gt;");
} }
// check if we still are inside any kind of tag // check if we still are inside any kind of tag
insideTag = insideComment || insideMacroTag || insideHtmlTag; insideTag = insideComment || insideMacroTag || insideHtmlTag;
insideCloseTag = insideTag;
break; break;
@ -739,9 +789,16 @@ public final class HtmlEncoder {
} }
// add remaining newlines we may have collected // add remaining newlines we may have collected
int swallowBreaks = 0;
if (paragraphs && exiting < BLOCK) {
ret.insert(paragraphStart, "<p>");
ret.append("</p>");
swallowBreaks = 2;
}
if (linebreaks > 0) { if (linebreaks > 0) {
for (int i = 0; i < linebreaks; i++) { for (int i = linebreaks-1; i>=0; i--) {
if (i >= swallowLinebreaks) { if (i >= swallowBreaks && i > exiting) {
ret.append("<br />"); ret.append("<br />");
} }
ret.append(newLine); ret.append(newLine);