diff --git a/src/helma/util/HtmlEncoder.java b/src/helma/util/HtmlEncoder.java index 3cc042d9..392e75f9 100644 --- a/src/helma/util/HtmlEncoder.java +++ b/src/helma/util/HtmlEncoder.java @@ -278,49 +278,53 @@ public final class HtmlEncoder { // conversion around them to look good. However, they differ // in how many newlines around them should ignored. These sets // help to treat each tag right in newline conversion. - static final HashSet swallowAll = new HashSet(); - static final HashSet swallowTwo = new HashSet(); - static final HashSet swallowOne = new HashSet(); + static final HashSet internalTags = new HashSet(); + static final HashSet blockTags = new HashSet(); + static final HashSet semiBlockTags = new HashSet(); static { // actual block level elements - swallowOne.add("address"); - swallowTwo.add("blockquote"); - swallowTwo.add("center"); - swallowOne.add("dir"); - swallowOne.add("div"); - swallowTwo.add("dl"); - swallowTwo.add("fieldset"); - swallowTwo.add("form"); - swallowTwo.add("h1"); - swallowTwo.add("h2"); - swallowTwo.add("h3"); - swallowTwo.add("h4"); - swallowTwo.add("h5"); - swallowTwo.add("h6"); - swallowTwo.add("hr"); - swallowTwo.add("isindex"); - swallowAll.add("menu"); - swallowAll.add("noframes"); - swallowAll.add("noscript"); - swallowTwo.add("ol"); - swallowTwo.add("p"); - swallowTwo.add("pre"); - swallowOne.add("table"); - swallowTwo.add("ul"); + semiBlockTags.add("address"); + semiBlockTags.add("dir"); + semiBlockTags.add("div"); + semiBlockTags.add("table"); + + blockTags.add("blockquote"); + blockTags.add("center"); + blockTags.add("dl"); + blockTags.add("fieldset"); + blockTags.add("form"); + blockTags.add("h1"); + blockTags.add("h2"); + blockTags.add("h3"); + blockTags.add("h4"); + blockTags.add("h5"); + blockTags.add("h6"); + blockTags.add("hr"); + blockTags.add("isindex"); + blockTags.add("ol"); + blockTags.add("p"); + blockTags.add("pre"); + blockTags.add("ul"); + + internalTags.add("menu"); + internalTags.add("noframes"); + internalTags.add("noscript"); /// to be treated as block level elements - swallowTwo.add("br"); - swallowTwo.add("dd"); - swallowTwo.add("dt"); - swallowTwo.add("frameset"); - swallowTwo.add("li"); - swallowAll.add("tbody"); - swallowTwo.add("td"); - swallowAll.add("tfoot"); - swallowOne.add("th"); - swallowAll.add("thead"); - swallowAll.add("tr"); + semiBlockTags.add("th"); + + blockTags.add("br"); + blockTags.add("dd"); + blockTags.add("dt"); + blockTags.add("frameset"); + blockTags.add("li"); + blockTags.add("td"); + + internalTags.add("tbody"); + internalTags.add("tfoot"); + internalTags.add("thead"); + internalTags.add("tr"); } // set of tags that are always empty @@ -347,8 +351,12 @@ public final class HtmlEncoder { static final byte TAG_ATT_NAME = 2; static final byte TAG_ATT_VAL = 3; - static final String newLine = System.getProperty("line.separator"); + static final byte TEXT = 0; + static final byte SEMIBLOCK = 1; + static final byte BLOCK = 2; + static final byte INTERNAL = 3; + static final String newLine = System.getProperty("line.separator"); /** * Do "smart" encodging on a string. This means that valid HTML entities and tags, @@ -369,7 +377,7 @@ public final class HtmlEncoder { // try to make stringbuffer large enough from the start StringBuffer ret = new StringBuffer(Math.round(l * 1.4f)); - encode(str, ret, null); + encode(str, ret, false, null); return ret.toString(); } @@ -380,21 +388,39 @@ public final class HtmlEncoder { * other occurrences of '<', '>' and '&' are encoded to HTML entities. */ public final static void encode(String str, StringBuffer ret) { - encode(str, ret, null); + encode(str, ret, false, null); } /** * Do "smart" encodging on a string. This means that valid HTML entities and tags, * Helma macros and HTML comments are passed through unescaped, while * other occurrences of '<', '>' and '&' are encoded to HTML entities. + * + * @param str the string to encode + * @param ret the string buffer to encode to + * @param paragraphs if true use p tags for paragraphs, otherwise just use br's + * @param allowedTags a set containing the names of allowed tags as strings. All other + * tags will be escaped */ - public final static void encode(String str, StringBuffer ret, Set allowedTags) { + public final static void encode(String str, StringBuffer ret, + boolean paragraphs, Set allowedTags) { if (str == null) { return; } int l = str.length(); + // where to insert the
tag in case we want to create a paragraph later on
+ int paragraphStart = ret.length();
+
+ // what kind of element/text are we leaving and entering?
+ // this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
+ // depending on this information, we decide whether and how to insert
+ // paragraphs and line breaks. "entering" a tag means we're at the '<'
+ // and exiting means we're at the '>', not that it's a start or close tag.
+ byte entering = TEXT;
+ byte exiting = TEXT;
+
Stack openTags = new Stack();
// are we currently within a < and a > that consitute some kind of tag?
@@ -424,15 +450,14 @@ public final class HtmlEncoder {
char htmlQuoteChar = '\u0000';
char macroQuoteChar = '\u0000';
- // number of newlines to ignore in \n ->
conversion
- int swallowLinebreaks = 0;
-
// number of newlines met since the last non-whitespace character
int linebreaks = 0;
// did we meet a backslash escape?
boolean escape = false;
+ boolean triggerBreak = false;
+
for (int i = 0; i < l; i++) {
char c = str.charAt(i);
@@ -475,20 +500,19 @@ public final class HtmlEncoder {
htmlQuoteChar = '\u0000';
htmlTagMode = TAG_NAME;
- // set ignoreNewline on some tags, depending on wheather they're
- // being opened or closed.
- // what's going on here? we switch newline encoding on inside some tags, for
- // others we switch it on when they're closed
- linebreaks = Math.max(linebreaks - swallowLinebreaks, 0);
+ exiting = entering;
+ entering = TEXT;
- if (swallowAll.contains(tagName)) {
- swallowLinebreaks = 1000;
- } else if (swallowTwo.contains(tagName)) {
- swallowLinebreaks = 2;
- } else if (swallowOne.contains(tagName)) {
- swallowLinebreaks = 1;
- } else {
- swallowLinebreaks = 0;
+ if (internalTags.contains(tagName)) {
+ entering = INTERNAL;
+ } else if (blockTags.contains(tagName)) {
+ entering = BLOCK;
+ } else if (semiBlockTags.contains(tagName)) {
+ entering = paragraphs ? BLOCK : SEMIBLOCK;
+ }
+
+ if (entering > 0) {
+ triggerBreak = !insidePreTag;
}
if (insideCloseTag) {
@@ -513,7 +537,6 @@ public final class HtmlEncoder {
openTags.pop();
} else {
openTags.push(tagName);
- swallowLinebreaks = Math.max(swallowLinebreaks - 1, 0);
}
if ("code".equals(tagName) && !insideCloseTag) {
@@ -526,25 +549,44 @@ public final class HtmlEncoder {
}
}
}
- }
- // if (i < l-2)
+ } // if (i < l-2)
}
- if ((linebreaks > 0 || swallowLinebreaks > 0) && !Character.isWhitespace(c)) {
- if (!insidePreTag) {
- for (int k = 0; k < linebreaks; k++) {
- if (k >= swallowLinebreaks) {
+ if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {
+
+ if (!insideTag) {
+ exiting = entering;
+ entering = TEXT;
+ if (exiting >= SEMIBLOCK) {
+ paragraphStart = ret.length();
+ }
+ }
+
+ if (entering != INTERNAL && exiting != INTERNAL) {
+ int swallowBreaks = 0;
+ if (paragraphs && (entering != BLOCK || exiting != BLOCK) &&
+ (exiting < BLOCK) &&
+ (entering >= SEMIBLOCK || linebreaks > 1) &&
+ paragraphStart < ret.length()) {
+ ret.insert(paragraphStart, "
"); + ret.append("
"); + swallowBreaks = 2; + } + + for (int k = linebreaks-1; k>=0; k--) { + if (k >= swallowBreaks && k >= entering && k >= exiting) { ret.append(""); + ret.append("
"); + swallowBreaks = 2; + } + if (linebreaks > 0) { - for (int i = 0; i < linebreaks; i++) { - if (i >= swallowLinebreaks) { + for (int i = linebreaks-1; i>=0; i--) { + if (i >= swallowBreaks && i > exiting) { ret.append("