From 253ca75822bf0978b75362d63ff56da7978270a6 Mon Sep 17 00:00:00 2001 From: hns Date: Fri, 19 Sep 2003 16:12:01 +0000 Subject: [PATCH] Reworked formatting code Implemented paragraph formatting Fixed bug with newlines in pre tag --- src/helma/util/HtmlEncoder.java | 207 ++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 75 deletions(-) diff --git a/src/helma/util/HtmlEncoder.java b/src/helma/util/HtmlEncoder.java index 3cc042d9..392e75f9 100644 --- a/src/helma/util/HtmlEncoder.java +++ b/src/helma/util/HtmlEncoder.java @@ -278,49 +278,53 @@ public final class HtmlEncoder { // conversion around them to look good. However, they differ // in how many newlines around them should ignored. These sets // help to treat each tag right in newline conversion. - static final HashSet swallowAll = new HashSet(); - static final HashSet swallowTwo = new HashSet(); - static final HashSet swallowOne = new HashSet(); + static final HashSet internalTags = new HashSet(); + static final HashSet blockTags = new HashSet(); + static final HashSet semiBlockTags = new HashSet(); static { // actual block level elements - swallowOne.add("address"); - swallowTwo.add("blockquote"); - swallowTwo.add("center"); - swallowOne.add("dir"); - swallowOne.add("div"); - swallowTwo.add("dl"); - swallowTwo.add("fieldset"); - swallowTwo.add("form"); - swallowTwo.add("h1"); - swallowTwo.add("h2"); - swallowTwo.add("h3"); - swallowTwo.add("h4"); - swallowTwo.add("h5"); - swallowTwo.add("h6"); - swallowTwo.add("hr"); - swallowTwo.add("isindex"); - swallowAll.add("menu"); - swallowAll.add("noframes"); - swallowAll.add("noscript"); - swallowTwo.add("ol"); - swallowTwo.add("p"); - swallowTwo.add("pre"); - swallowOne.add("table"); - swallowTwo.add("ul"); + semiBlockTags.add("address"); + semiBlockTags.add("dir"); + semiBlockTags.add("div"); + semiBlockTags.add("table"); + + blockTags.add("blockquote"); + blockTags.add("center"); + blockTags.add("dl"); + blockTags.add("fieldset"); + blockTags.add("form"); + blockTags.add("h1"); + blockTags.add("h2"); + blockTags.add("h3"); + blockTags.add("h4"); + blockTags.add("h5"); + blockTags.add("h6"); + blockTags.add("hr"); + blockTags.add("isindex"); + blockTags.add("ol"); + blockTags.add("p"); + blockTags.add("pre"); + blockTags.add("ul"); + + internalTags.add("menu"); + internalTags.add("noframes"); + internalTags.add("noscript"); /// to be treated as block level elements - swallowTwo.add("br"); - swallowTwo.add("dd"); - swallowTwo.add("dt"); - swallowTwo.add("frameset"); - swallowTwo.add("li"); - swallowAll.add("tbody"); - swallowTwo.add("td"); - swallowAll.add("tfoot"); - swallowOne.add("th"); - swallowAll.add("thead"); - swallowAll.add("tr"); + semiBlockTags.add("th"); + + blockTags.add("br"); + blockTags.add("dd"); + blockTags.add("dt"); + blockTags.add("frameset"); + blockTags.add("li"); + blockTags.add("td"); + + internalTags.add("tbody"); + internalTags.add("tfoot"); + internalTags.add("thead"); + internalTags.add("tr"); } // set of tags that are always empty @@ -347,8 +351,12 @@ public final class HtmlEncoder { static final byte TAG_ATT_NAME = 2; static final byte TAG_ATT_VAL = 3; - static final String newLine = System.getProperty("line.separator"); + static final byte TEXT = 0; + static final byte SEMIBLOCK = 1; + static final byte BLOCK = 2; + static final byte INTERNAL = 3; + static final String newLine = System.getProperty("line.separator"); /** * Do "smart" encodging on a string. This means that valid HTML entities and tags, @@ -369,7 +377,7 @@ public final class HtmlEncoder { // try to make stringbuffer large enough from the start StringBuffer ret = new StringBuffer(Math.round(l * 1.4f)); - encode(str, ret, null); + encode(str, ret, false, null); return ret.toString(); } @@ -380,21 +388,39 @@ public final class HtmlEncoder { * other occurrences of '<', '>' and '&' are encoded to HTML entities. */ public final static void encode(String str, StringBuffer ret) { - encode(str, ret, null); + encode(str, ret, false, null); } /** * Do "smart" encodging on a string. This means that valid HTML entities and tags, * Helma macros and HTML comments are passed through unescaped, while * other occurrences of '<', '>' and '&' are encoded to HTML entities. + * + * @param str the string to encode + * @param ret the string buffer to encode to + * @param paragraphs if true use p tags for paragraphs, otherwise just use br's + * @param allowedTags a set containing the names of allowed tags as strings. All other + * tags will be escaped */ - public final static void encode(String str, StringBuffer ret, Set allowedTags) { + public final static void encode(String str, StringBuffer ret, + boolean paragraphs, Set allowedTags) { if (str == null) { return; } int l = str.length(); + // where to insert the

tag in case we want to create a paragraph later on + int paragraphStart = ret.length(); + + // what kind of element/text are we leaving and entering? + // this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL + // depending on this information, we decide whether and how to insert + // paragraphs and line breaks. "entering" a tag means we're at the '<' + // and exiting means we're at the '>', not that it's a start or close tag. + byte entering = TEXT; + byte exiting = TEXT; + Stack openTags = new Stack(); // are we currently within a < and a > that consitute some kind of tag? @@ -424,15 +450,14 @@ public final class HtmlEncoder { char htmlQuoteChar = '\u0000'; char macroQuoteChar = '\u0000'; - // number of newlines to ignore in \n ->
conversion - int swallowLinebreaks = 0; - // number of newlines met since the last non-whitespace character int linebreaks = 0; // did we meet a backslash escape? boolean escape = false; + boolean triggerBreak = false; + for (int i = 0; i < l; i++) { char c = str.charAt(i); @@ -475,20 +500,19 @@ public final class HtmlEncoder { htmlQuoteChar = '\u0000'; htmlTagMode = TAG_NAME; - // set ignoreNewline on some tags, depending on wheather they're - // being opened or closed. - // what's going on here? we switch newline encoding on inside some tags, for - // others we switch it on when they're closed - linebreaks = Math.max(linebreaks - swallowLinebreaks, 0); + exiting = entering; + entering = TEXT; - if (swallowAll.contains(tagName)) { - swallowLinebreaks = 1000; - } else if (swallowTwo.contains(tagName)) { - swallowLinebreaks = 2; - } else if (swallowOne.contains(tagName)) { - swallowLinebreaks = 1; - } else { - swallowLinebreaks = 0; + if (internalTags.contains(tagName)) { + entering = INTERNAL; + } else if (blockTags.contains(tagName)) { + entering = BLOCK; + } else if (semiBlockTags.contains(tagName)) { + entering = paragraphs ? BLOCK : SEMIBLOCK; + } + + if (entering > 0) { + triggerBreak = !insidePreTag; } if (insideCloseTag) { @@ -513,7 +537,6 @@ public final class HtmlEncoder { openTags.pop(); } else { openTags.push(tagName); - swallowLinebreaks = Math.max(swallowLinebreaks - 1, 0); } if ("code".equals(tagName) && !insideCloseTag) { @@ -526,25 +549,44 @@ public final class HtmlEncoder { } } } - } - // if (i < l-2) + } // if (i < l-2) } - if ((linebreaks > 0 || swallowLinebreaks > 0) && !Character.isWhitespace(c)) { - if (!insidePreTag) { - for (int k = 0; k < linebreaks; k++) { - if (k >= swallowLinebreaks) { + if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) { + + if (!insideTag) { + exiting = entering; + entering = TEXT; + if (exiting >= SEMIBLOCK) { + paragraphStart = ret.length(); + } + } + + if (entering != INTERNAL && exiting != INTERNAL) { + int swallowBreaks = 0; + if (paragraphs && (entering != BLOCK || exiting != BLOCK) && + (exiting < BLOCK) && + (entering >= SEMIBLOCK || linebreaks > 1) && + paragraphStart < ret.length()) { + ret.insert(paragraphStart, "

"); + ret.append("

"); + swallowBreaks = 2; + } + + for (int k = linebreaks-1; k>=0; k--) { + if (k >= swallowBreaks && k >= entering && k >= exiting) { ret.append("
"); } ret.append(newLine); } - } + if (exiting >= SEMIBLOCK || linebreaks > 1) { + paragraphStart = ret.length(); + } - if (!insideTag) { - swallowLinebreaks = 0; } linebreaks = 0; + triggerBreak = false; } switch (c) { @@ -632,17 +674,18 @@ public final class HtmlEncoder { break; case '\n': - if (!insideTag) { - linebreaks++; - } else { + if (insideTag || insidePreTag) { ret.append('\n'); + } else { + linebreaks++; } break; case '\r': - if (!insideTag) { - break; + if (insideTag || insidePreTag) { + ret.append('\r'); } + break; case '>': @@ -673,12 +716,19 @@ public final class HtmlEncoder { openTags.pop(); } } + + exiting = entering; + if (exiting > 0) { + triggerBreak = !insidePreTag; + } + } else { ret.append(">"); } // check if we still are inside any kind of tag insideTag = insideComment || insideMacroTag || insideHtmlTag; + insideCloseTag = insideTag; break; @@ -739,9 +789,16 @@ public final class HtmlEncoder { } // add remaining newlines we may have collected + int swallowBreaks = 0; + if (paragraphs && exiting < BLOCK) { + ret.insert(paragraphStart, "

"); + ret.append("

"); + swallowBreaks = 2; + } + if (linebreaks > 0) { - for (int i = 0; i < linebreaks; i++) { - if (i >= swallowLinebreaks) { + for (int i = linebreaks-1; i>=0; i--) { + if (i >= swallowBreaks && i > exiting) { ret.append("
"); } ret.append(newLine);