HTML encoding is now smarter about encoding &, < and >.
If they are part of a valid HTML entity reference, an HTML tag or a Helma macro they are passed through unchanged, otherwise they are encoded to &, < or >. Another new feature that should make writing documentation on Helma easier is that everything is encoded if it is placed within a <code> tag.
This commit is contained in:
parent
52a97b1a46
commit
962b2b6e6c
1 changed files with 221 additions and 67 deletions
|
@ -158,6 +158,125 @@ public final class HtmlEncoder {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static final HashSet allTags = new HashSet ();
|
||||||
|
static {
|
||||||
|
allTags.add ("a");
|
||||||
|
allTags.add ("abbr");
|
||||||
|
allTags.add ("address");
|
||||||
|
allTags.add ("applet");
|
||||||
|
allTags.add ("area");
|
||||||
|
allTags.add ("b");
|
||||||
|
allTags.add ("base");
|
||||||
|
allTags.add ("basefont");
|
||||||
|
allTags.add ("bgsound");
|
||||||
|
allTags.add ("big");
|
||||||
|
allTags.add ("blink");
|
||||||
|
allTags.add ("blockquote");
|
||||||
|
allTags.add ("bq");
|
||||||
|
allTags.add ("body");
|
||||||
|
allTags.add ("br");
|
||||||
|
allTags.add ("button");
|
||||||
|
allTags.add ("caption");
|
||||||
|
allTags.add ("center");
|
||||||
|
allTags.add ("cite");
|
||||||
|
allTags.add ("code");
|
||||||
|
allTags.add ("col");
|
||||||
|
allTags.add ("colgroup");
|
||||||
|
allTags.add ("del");
|
||||||
|
allTags.add ("dir");
|
||||||
|
allTags.add ("div");
|
||||||
|
allTags.add ("dl");
|
||||||
|
allTags.add ("dt");
|
||||||
|
allTags.add ("dd");
|
||||||
|
allTags.add ("em");
|
||||||
|
allTags.add ("embed");
|
||||||
|
allTags.add ("fieldset");
|
||||||
|
allTags.add ("font");
|
||||||
|
allTags.add ("form");
|
||||||
|
allTags.add ("frame");
|
||||||
|
allTags.add ("frameset");
|
||||||
|
allTags.add ("h1");
|
||||||
|
allTags.add ("h2");
|
||||||
|
allTags.add ("h3");
|
||||||
|
allTags.add ("h4");
|
||||||
|
allTags.add ("h5");
|
||||||
|
allTags.add ("h6");
|
||||||
|
allTags.add ("head");
|
||||||
|
allTags.add ("html");
|
||||||
|
allTags.add ("i");
|
||||||
|
allTags.add ("iframe");
|
||||||
|
allTags.add ("img");
|
||||||
|
allTags.add ("input");
|
||||||
|
allTags.add ("ins");
|
||||||
|
allTags.add ("isindex");
|
||||||
|
allTags.add ("kbd");
|
||||||
|
allTags.add ("li");
|
||||||
|
allTags.add ("link");
|
||||||
|
allTags.add ("listing");
|
||||||
|
allTags.add ("map");
|
||||||
|
allTags.add ("marquee");
|
||||||
|
allTags.add ("menu");
|
||||||
|
allTags.add ("meta");
|
||||||
|
allTags.add ("nobr");
|
||||||
|
allTags.add ("noframes");
|
||||||
|
allTags.add ("object");
|
||||||
|
allTags.add ("ol");
|
||||||
|
allTags.add ("option");
|
||||||
|
allTags.add ("optgroup");
|
||||||
|
allTags.add ("p");
|
||||||
|
allTags.add ("param");
|
||||||
|
allTags.add ("plaintext");
|
||||||
|
allTags.add ("pre");
|
||||||
|
allTags.add ("q");
|
||||||
|
allTags.add ("samp");
|
||||||
|
allTags.add ("script");
|
||||||
|
allTags.add ("select");
|
||||||
|
allTags.add ("small");
|
||||||
|
allTags.add ("span");
|
||||||
|
allTags.add ("strike");
|
||||||
|
allTags.add ("strong");
|
||||||
|
allTags.add ("style");
|
||||||
|
allTags.add ("sub");
|
||||||
|
allTags.add ("sup");
|
||||||
|
allTags.add ("table");
|
||||||
|
allTags.add ("tbody");
|
||||||
|
allTags.add ("td");
|
||||||
|
allTags.add ("textarea");
|
||||||
|
allTags.add ("tfoot");
|
||||||
|
allTags.add ("th");
|
||||||
|
allTags.add ("thead");
|
||||||
|
allTags.add ("title");
|
||||||
|
allTags.add ("tr");
|
||||||
|
allTags.add ("tt");
|
||||||
|
allTags.add ("u");
|
||||||
|
allTags.add ("ul");
|
||||||
|
allTags.add ("var");
|
||||||
|
allTags.add ("wbr");
|
||||||
|
allTags.add ("xmp");
|
||||||
|
allTags.add ("%");
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags which signal us to start suppressing \n -> <br> encoding
|
||||||
|
// these are "structrural" tags, for example, we don't want to add <br>s
|
||||||
|
// between a <table> and a <tr>.
|
||||||
|
static final HashSet suppressLinebreakTags = new HashSet ();
|
||||||
|
static {
|
||||||
|
suppressLinebreakTags.add ("table");
|
||||||
|
suppressLinebreakTags.add ("ul");
|
||||||
|
suppressLinebreakTags.add ("ol");
|
||||||
|
suppressLinebreakTags.add ("pre");
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags which signal us to stop suppressing \n -> <br> encoding
|
||||||
|
// these usually signal transition from structural tags to normal
|
||||||
|
// HTML text, e.g. <td>
|
||||||
|
static final HashSet encodeLinebreakTags = new HashSet ();
|
||||||
|
static {
|
||||||
|
encodeLinebreakTags.add ("td");
|
||||||
|
encodeLinebreakTags.add ("th");
|
||||||
|
encodeLinebreakTags.add ("li");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -175,63 +294,98 @@ public final class HtmlEncoder {
|
||||||
if (str == null)
|
if (str == null)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int l = str.length();
|
char[] chars = str.toCharArray ();
|
||||||
|
int l = chars.length;
|
||||||
|
|
||||||
boolean closeTag=false, readTag=false, tagOpen=false;
|
// are we currently within a < and a >?
|
||||||
// the difference between swallowOneNewline and ignoreNewline is that swallowOneNewline is just effective once (for the next newline)
|
boolean insideTag=false;
|
||||||
|
// if we are inside a <code> tag, we encode everything to make
|
||||||
|
// documentation work easier
|
||||||
|
boolean insideCodeTag = false;
|
||||||
|
// the difference between swallowOneNewline and ignoreNewline is that
|
||||||
|
// swallowOneNewline is just effective once (for the next newline)
|
||||||
boolean ignoreNewline = false;
|
boolean ignoreNewline = false;
|
||||||
boolean swallowOneNewline = false;
|
boolean swallowOneNewline = false;
|
||||||
StringBuffer tag = new StringBuffer ();
|
|
||||||
|
|
||||||
for (int i=0; i<l; i++) {
|
for (int i=0; i<l; i++) {
|
||||||
char c = str.charAt (i);
|
char c = chars[i];
|
||||||
if (readTag) {
|
|
||||||
if (Character.isLetterOrDigit (c))
|
switch (c) {
|
||||||
tag.append (c);
|
case '&':
|
||||||
else if ('/' == c)
|
// check if this is an HTML entity already, in which case we pass it though unchanged
|
||||||
closeTag = true;
|
if (i < l-4 && !insideCodeTag) {
|
||||||
else {
|
// is this a numeric entity?
|
||||||
String t = tag.toString ();
|
if (chars[i+1] == '#' ) {
|
||||||
|
int j = i+2;
|
||||||
|
while (j<l && Character.isDigit (chars[j]))
|
||||||
|
j++;
|
||||||
|
if (j<l && chars[j] == ';') {
|
||||||
|
ret.append ("&");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int j = i+1;
|
||||||
|
while (j<l && Character.isLetterOrDigit (chars[j]))
|
||||||
|
j++;
|
||||||
|
if (j<l && chars[j] == ';') {
|
||||||
|
ret.append ("&");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// we didn't reach a break, so encode the ampersand as HTML entity
|
||||||
|
ret.append ("&");
|
||||||
|
break;
|
||||||
|
case '<':
|
||||||
|
if (i < l-2) {
|
||||||
|
boolean insideCloseTag = ('/' == chars[i+1]);
|
||||||
|
int tagStart = insideCloseTag ? i+2 : i+1;
|
||||||
|
int j = tagStart;
|
||||||
|
while (j<l && (Character.isLetterOrDigit (chars[j]) || chars[j] == '%'))
|
||||||
|
j++;
|
||||||
|
if (j > tagStart && j < l) {
|
||||||
|
String tagName = new String (chars, tagStart, j-tagStart).toLowerCase ();
|
||||||
|
if ("code".equals (tagName) && insideCloseTag && insideCodeTag)
|
||||||
|
insideCodeTag = false;
|
||||||
|
if (allTags.contains (tagName) && !insideCodeTag) {
|
||||||
|
insideTag = true;
|
||||||
|
ret.append ('<');
|
||||||
// set ignoreNewline on some tags, depending on wheather they're
|
// set ignoreNewline on some tags, depending on wheather they're
|
||||||
// being opened or closed.
|
// being opened or closed.
|
||||||
// what's going on here? we switch newline encoding on inside some tags, for
|
// what's going on here? we switch newline encoding on inside some tags, for
|
||||||
// others we switch it on when they're closed
|
// others we switch it on when they're closed
|
||||||
if ("td".equalsIgnoreCase (t) || "th".equalsIgnoreCase (t) || "li".equalsIgnoreCase (t)) {
|
if (encodeLinebreakTags.contains (tagName)) {
|
||||||
ignoreNewline = closeTag;
|
ignoreNewline = insideCloseTag;
|
||||||
swallowOneNewline = true;
|
swallowOneNewline = true;
|
||||||
} else if ("table".equalsIgnoreCase (t) || "ul".equalsIgnoreCase (t) || "ol".equalsIgnoreCase (t) || "pre".equalsIgnoreCase (t)) {
|
} else if (suppressLinebreakTags.contains (tagName)) {
|
||||||
ignoreNewline = !closeTag;
|
ignoreNewline = !insideCloseTag;
|
||||||
swallowOneNewline = true;
|
swallowOneNewline = true;
|
||||||
} else if ("p".equalsIgnoreCase (t)) {
|
} else if ("p".equalsIgnoreCase (tagName) ||
|
||||||
|
"blockquote".equalsIgnoreCase (tagName) ||
|
||||||
|
"bq".equalsIgnoreCase (tagName)) {
|
||||||
swallowOneNewline = true;
|
swallowOneNewline = true;
|
||||||
}
|
}
|
||||||
|
if ("code".equals (tagName) && !insideCloseTag)
|
||||||
readTag = false;
|
insideCodeTag = true;
|
||||||
closeTag = false;
|
break;
|
||||||
tag.setLength (0);
|
|
||||||
}
|
}
|
||||||
} // if (readTag)
|
}
|
||||||
|
} // if (i < l-2)
|
||||||
switch (c) {
|
ret.append ("<");
|
||||||
// case '&':
|
break;
|
||||||
// ret.append ("&");
|
|
||||||
// break;
|
|
||||||
case '\n':
|
case '\n':
|
||||||
ret.append ('\n');
|
ret.append ('\n');
|
||||||
if (!ignoreNewline && !swallowOneNewline)
|
if (!insideTag && !ignoreNewline && !swallowOneNewline)
|
||||||
ret.append ("<br />");
|
ret.append ("<br />");
|
||||||
if (!tagOpen)
|
if (!insideTag)
|
||||||
swallowOneNewline = false;
|
swallowOneNewline = false;
|
||||||
break;
|
break;
|
||||||
case '<':
|
|
||||||
closeTag = false;
|
|
||||||
readTag = true;
|
|
||||||
tagOpen = true;
|
|
||||||
ret.append ('<');
|
|
||||||
break;
|
|
||||||
case '>':
|
case '>':
|
||||||
tagOpen = false;
|
if (insideTag)
|
||||||
ret.append ('>');
|
ret.append ('>');
|
||||||
|
else
|
||||||
|
ret.append (">");
|
||||||
|
insideTag = false;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// ret.append (c);
|
// ret.append (c);
|
||||||
|
@ -244,7 +398,7 @@ public final class HtmlEncoder {
|
||||||
ret.append ((int) c);
|
ret.append ((int) c);
|
||||||
ret.append (";");
|
ret.append (";");
|
||||||
}
|
}
|
||||||
if (!tagOpen && !Character.isWhitespace (c))
|
if (!insideTag && !Character.isWhitespace (c))
|
||||||
swallowOneNewline = false;
|
swallowOneNewline = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue