diff --git a/src/helma/util/HtmlEncoder.java b/src/helma/util/HtmlEncoder.java
index bcdb48d1..987ff043 100644
--- a/src/helma/util/HtmlEncoder.java
+++ b/src/helma/util/HtmlEncoder.java
@@ -294,50 +294,61 @@ public final class HtmlEncoder {
}
/**
- *
+ * Do "smart" encodging on a string. This means that valid HTML entities and tags,
+ * Helma macros and HTML comments are passed through unescaped, while
+ * other occurrences of '<', '>' and '&' are encoded to HTML entities.
*/
public final static void encode (String str, StringBuffer ret) {
if (str == null)
return;
- char[] chars = str.toCharArray ();
- int l = chars.length;
+ int l = str.length();
- // are we currently within a < and a >?
- boolean insideTag=false;
+ // are we currently within a < and a > that consitute some kind of tag?
+ // we use tag balancing to know whether we are inside a tag (and should
+ // pass things through unchanged) or outside (and should encode stuff).
+ boolean insideTag = false;
+ // are we inside an HTML tag?
+ boolean insideHtmlTag = false;
// if we are inside a tag, we encode everything to make
// documentation work easier
boolean insideCodeTag = false;
- // are we within a macro tag?
+ // are we within a Helma <% macro %> tag? We treat macro tags and
+ // comments specially, since we can't rely on tag balancing
+ // to know when we leave a macro tag or comment.
boolean insideMacroTag = false;
// are we inside an HTML comment?
boolean insideComment = false;
+ // the quotation mark we are in within an HTML or Macro tag, if any
+ char htmlQuoteChar = '\u0000';
+ char macroQuoteChar = '\u0000';
// the difference between swallowOneNewline and ignoreNewline is that
// swallowOneNewline is just effective once (for the next newline)
boolean ignoreNewline = false;
boolean swallowOneNewline = false;
for (int i=0; i tagStart && j < l) {
- String tagName = new String (chars, tagStart, j-tagStart).toLowerCase ();
- if ("code".equals (tagName) && insideCloseTag && insideCodeTag)
- insideCodeTag = false;
- if (allTags.contains (tagName) && !insideCodeTag) {
- insideTag = true;
- ret.append ('<');
- // set ignoreNewline on some tags, depending on wheather they're
- // being opened or closed.
- // what's going on here? we switch newline encoding on inside some tags, for
- // others we switch it on when they're closed
- if (encodeLinebreakTags.contains (tagName)) {
- ignoreNewline = insideCloseTag;
- swallowOneNewline = true;
- } else if (suppressLinebreakTags.contains (tagName)) {
- ignoreNewline = !insideCloseTag;
- swallowOneNewline = true;
- } else if ("p".equals (tagName) || "blockquote".equals (tagName) || "bq".equals (tagName)) {
- swallowOneNewline = true;
+ if (i < l-2) {
+ if (!insideMacroTag && '%' == str.charAt(i+1)) {
+ // this is the beginning of a Helma macro tag
+ insideMacroTag = insideTag = true;
+ macroQuoteChar = '\u0000';
+ } else if ('!' == str.charAt(i+1) && '-' == str.charAt(i+2)) {
+ // the beginning of an HTML comment?
+ insideComment = insideTag = (i tagStart && j < l) {
+ String tagName = str.substring (tagStart, j).toLowerCase();
+ if ("code".equals (tagName) && insideCloseTag && insideCodeTag)
+ insideCodeTag = false;
+ if (allTags.contains (tagName) && !insideCodeTag) {
+ insideHtmlTag = insideTag = true;
+ htmlQuoteChar = '\u0000';
+ // set ignoreNewline on some tags, depending on wheather they're
+ // being opened or closed.
+ // what's going on here? we switch newline encoding on inside some tags, for
+ // others we switch it on when they're closed
+ if (encodeLinebreakTags.contains (tagName)) {
+ ignoreNewline = insideCloseTag;
+ swallowOneNewline = true;
+ } else if (suppressLinebreakTags.contains (tagName)) {
+ ignoreNewline = !insideCloseTag;
+ swallowOneNewline = true;
+ } else if ("p".equals (tagName) || "blockquote".equals (tagName) || "bq".equals (tagName)) {
+ swallowOneNewline = true;
+ }
+ if ("code".equals (tagName) && !insideCloseTag)
+ insideCodeTag = true;
}
- if ("code".equals (tagName) && !insideCloseTag)
- insideCodeTag = true;
- break;
}
}
} // if (i < l-2)
- ret.append ("<");
+ if (insideTag)
+ ret.append ('<');
+ else
+ ret.append ("<");
+ break;
+ case '"':
+ case '\'':
+ ret.append (c);
+ if (!insideComment) {
+ if (insideMacroTag) {
+ if (macroQuoteChar == c)
+ macroQuoteChar = '\u0000';
+ else if (macroQuoteChar == '\u0000')
+ macroQuoteChar = c;
+ } else if (insideHtmlTag) {
+ if (htmlQuoteChar == c)
+ htmlQuoteChar = '\u0000';
+ else if (htmlQuoteChar == '\u0000')
+ htmlQuoteChar = c;
+ }
+ }
break;
case '\n':
ret.append ('\n');
- if (!insideTag && !ignoreNewline && !swallowOneNewline)
- ret.append ("
");
- if (!insideTag)
+ if (!insideTag) {
+ if (!ignoreNewline && !swallowOneNewline)
+ ret.append ("
");
swallowOneNewline = false;
+ }
break;
case '>':
- if (insideTag) {
+ // For Helma macro tags and comments, we overrule tag balancing,
+ // i.e. we don't require that '<' and '>' be balanced within
+ // macros and comments. Rather, we check for the matching closing tag.
+ if (insideComment) {
ret.append ('>');
- if (insideMacroTag)
- insideMacroTag = insideTag = !(chars[i-1] == '%');
- else if (insideComment)
- insideComment = insideTag = !(chars[i-2] == '-' && chars[i-1] == '-');
- else
- insideTag = false;
+ insideComment = !(str.charAt(i-2) == '-' && str.charAt(i-1) == '-');
+ } else if (insideMacroTag) {
+ ret.append ('>');
+ insideMacroTag = !(str.charAt(i-1) == '%' && macroQuoteChar == '\u0000');
+ } else if (insideHtmlTag) {
+ ret.append ('>');
+ // only leave HTML tag if quotation marks are balanced
+ // within that tag.
+ insideHtmlTag = htmlQuoteChar != '\u0000';
} else {
ret.append (">");
}
+ // check if we still are inside any kind of tag
+ insideTag = insideComment || insideMacroTag || insideHtmlTag;
break;
default:
// ret.append (c);
@@ -428,7 +465,7 @@ public final class HtmlEncoder {
ret.append ((int) c);
ret.append (";");
}
- if (!insideTag && !Character.isWhitespace (c))
+ if (swallowOneNewline && !insideTag && !Character.isWhitespace (c))
swallowOneNewline = false;
}
}