Helma encode() functions now do entity encoding
again, but the right way. It transforms special Windows characters (such as smart quotes) top valid Unicode, uses symbolic entities for ISO-8859-1 characters and numeric entities fpr everything above.
This commit is contained in:
parent
5a589d4bd6
commit
c424e54261
1 changed files with 180 additions and 122 deletions
|
@ -17,109 +17,146 @@ import java.text.*;
|
|||
|
||||
public final class HtmlEncoder {
|
||||
|
||||
// transformation table for characters 128 to 255. These actually fall into two
|
||||
// groups, put together for efficiency: "Windows" chacacters 128-159 such as
|
||||
// "smart quotes", which are encoded to valid Unicode entities, and
|
||||
// valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML
|
||||
// entity. Everything >= 256 is encoded to a numeric entity.
|
||||
//
|
||||
// for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and
|
||||
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
|
||||
//
|
||||
static final String[] transform = {
|
||||
"€", // 128
|
||||
"", // empty string means character is undefined in unicode
|
||||
"‚",
|
||||
"ƒ",
|
||||
"„",
|
||||
"…",
|
||||
"†",
|
||||
"‡",
|
||||
"ˆ",
|
||||
"‰",
|
||||
"Š",
|
||||
"‹",
|
||||
"Œ",
|
||||
"",
|
||||
"Ž",
|
||||
"",
|
||||
"",
|
||||
"‘",
|
||||
"’",
|
||||
"“",
|
||||
"”",
|
||||
"•",
|
||||
"–",
|
||||
"—",
|
||||
"˜",
|
||||
"™",
|
||||
"š",
|
||||
"›",
|
||||
"œ",
|
||||
"",
|
||||
"ž",
|
||||
"Ÿ", // 159
|
||||
" ", // 160
|
||||
"¡",
|
||||
"¢",
|
||||
"£",
|
||||
"¤",
|
||||
"¥",
|
||||
"¦",
|
||||
"§",
|
||||
"¨",
|
||||
"©",
|
||||
"ª",
|
||||
"«",
|
||||
"¬",
|
||||
"­",
|
||||
"®",
|
||||
"¯",
|
||||
"°",
|
||||
"±",
|
||||
"²",
|
||||
"³",
|
||||
"´",
|
||||
"µ",
|
||||
"¶",
|
||||
"·",
|
||||
"¸",
|
||||
"¹",
|
||||
"º",
|
||||
"»",
|
||||
"¼",
|
||||
"½",
|
||||
"¾",
|
||||
"¿",
|
||||
"À",
|
||||
"Á",
|
||||
"Â",
|
||||
"Ã",
|
||||
"Ä",
|
||||
"Å",
|
||||
"Æ",
|
||||
"Ç",
|
||||
"È",
|
||||
"É",
|
||||
"Ê",
|
||||
"Ë",
|
||||
"Ì",
|
||||
"Í",
|
||||
"Î",
|
||||
"Ï",
|
||||
"Ð",
|
||||
"Ñ",
|
||||
"Ò",
|
||||
"Ó",
|
||||
"Ô",
|
||||
"Õ",
|
||||
"Ö",
|
||||
"×",
|
||||
"Ø",
|
||||
"Ù",
|
||||
"Ú",
|
||||
"Û",
|
||||
"Ü",
|
||||
"Ý",
|
||||
"Þ",
|
||||
"ß",
|
||||
"à",
|
||||
"á",
|
||||
"â",
|
||||
"ã",
|
||||
"ä",
|
||||
"å",
|
||||
"æ",
|
||||
"ç",
|
||||
"è",
|
||||
"é",
|
||||
"ê",
|
||||
"ë",
|
||||
"ì",
|
||||
"í",
|
||||
"î",
|
||||
"ï",
|
||||
"ð",
|
||||
"ñ",
|
||||
"ò",
|
||||
"ó",
|
||||
"ô",
|
||||
"õ",
|
||||
"ö",
|
||||
"÷",
|
||||
"ø",
|
||||
"ù",
|
||||
"ú",
|
||||
"û",
|
||||
"ü",
|
||||
"ý",
|
||||
"þ",
|
||||
"ÿ" // 255
|
||||
};
|
||||
|
||||
/*
|
||||
static final Hashtable convertor = new Hashtable (128);
|
||||
|
||||
// conversion table
|
||||
static {
|
||||
convertor.put(new Integer(160), " ");
|
||||
convertor.put(new Integer(161), "¡");
|
||||
convertor.put(new Integer(162), "¢");
|
||||
convertor.put(new Integer(163), "£");
|
||||
convertor.put(new Integer(164), "¤");
|
||||
convertor.put(new Integer(165), "¥");
|
||||
convertor.put(new Integer(166), "¦");
|
||||
convertor.put(new Integer(167), "§");
|
||||
convertor.put(new Integer(168), "¨");
|
||||
convertor.put(new Integer(169), "©");
|
||||
convertor.put(new Integer(170), "ª");
|
||||
convertor.put(new Integer(171), "«");
|
||||
convertor.put(new Integer(172), "¬");
|
||||
convertor.put(new Integer(173), "­");
|
||||
convertor.put(new Integer(174), "®");
|
||||
convertor.put(new Integer(175), "¯");
|
||||
convertor.put(new Integer(176), "°");
|
||||
convertor.put(new Integer(177), "±");
|
||||
convertor.put(new Integer(178), "²");
|
||||
convertor.put(new Integer(179), "³");
|
||||
convertor.put(new Integer(180), "´");
|
||||
convertor.put(new Integer(181), "µ");
|
||||
convertor.put(new Integer(182), "¶");
|
||||
convertor.put(new Integer(183), "·");
|
||||
convertor.put(new Integer(184), "¸");
|
||||
convertor.put(new Integer(185), "¹");
|
||||
convertor.put(new Integer(186), "º");
|
||||
convertor.put(new Integer(187), "»");
|
||||
convertor.put(new Integer(188), "¼");
|
||||
convertor.put(new Integer(189), "½");
|
||||
convertor.put(new Integer(190), "¾");
|
||||
convertor.put(new Integer(191), "¿");
|
||||
convertor.put(new Integer(192), "À");
|
||||
convertor.put(new Integer(193), "Á");
|
||||
convertor.put(new Integer(194), "Â");
|
||||
convertor.put(new Integer(195), "Ã");
|
||||
convertor.put(new Integer(196), "Ä");
|
||||
convertor.put(new Integer(197), "Å");
|
||||
convertor.put(new Integer(198), "Æ");
|
||||
convertor.put(new Integer(199), "Ç");
|
||||
convertor.put(new Integer(200), "È");
|
||||
convertor.put(new Integer(201), "É");
|
||||
convertor.put(new Integer(202), "Ê");
|
||||
convertor.put(new Integer(203), "Ë");
|
||||
convertor.put(new Integer(204), "Ì");
|
||||
convertor.put(new Integer(205), "Í");
|
||||
convertor.put(new Integer(206), "Î");
|
||||
convertor.put(new Integer(207), "Ï");
|
||||
convertor.put(new Integer(208), "Ð");
|
||||
convertor.put(new Integer(209), "Ñ");
|
||||
convertor.put(new Integer(210), "Ò");
|
||||
convertor.put(new Integer(211), "Ó");
|
||||
convertor.put(new Integer(212), "Ô");
|
||||
convertor.put(new Integer(213), "Õ");
|
||||
convertor.put(new Integer(214), "Ö");
|
||||
convertor.put(new Integer(215), "×");
|
||||
convertor.put(new Integer(216), "Ø");
|
||||
convertor.put(new Integer(217), "Ù");
|
||||
convertor.put(new Integer(218), "Ú");
|
||||
convertor.put(new Integer(219), "Û");
|
||||
convertor.put(new Integer(220), "Ü");
|
||||
convertor.put(new Integer(221), "Ý");
|
||||
convertor.put(new Integer(222), "Þ");
|
||||
convertor.put(new Integer(223), "ß");
|
||||
convertor.put(new Integer(224), "à");
|
||||
convertor.put(new Integer(225), "á");
|
||||
convertor.put(new Integer(226), "â");
|
||||
convertor.put(new Integer(227), "ã");
|
||||
convertor.put(new Integer(228), "ä");
|
||||
convertor.put(new Integer(229), "å");
|
||||
convertor.put(new Integer(230), "æ");
|
||||
convertor.put(new Integer(231), "ç");
|
||||
convertor.put(new Integer(232), "è");
|
||||
convertor.put(new Integer(233), "é");
|
||||
convertor.put(new Integer(234), "ê");
|
||||
convertor.put(new Integer(235), "ë");
|
||||
convertor.put(new Integer(236), "ì");
|
||||
convertor.put(new Integer(237), "í");
|
||||
convertor.put(new Integer(238), "î");
|
||||
convertor.put(new Integer(239), "ï");
|
||||
convertor.put(new Integer(240), "ð");
|
||||
convertor.put(new Integer(241), "ñ");
|
||||
convertor.put(new Integer(242), "ò");
|
||||
convertor.put(new Integer(243), "ó");
|
||||
convertor.put(new Integer(244), "ô");
|
||||
convertor.put(new Integer(245), "õ");
|
||||
convertor.put(new Integer(246), "ö");
|
||||
convertor.put(new Integer(247), "÷");
|
||||
convertor.put(new Integer(248), "ø");
|
||||
convertor.put(new Integer(249), "ù");
|
||||
convertor.put(new Integer(250), "ú");
|
||||
convertor.put(new Integer(251), "û");
|
||||
convertor.put(new Integer(252), "ü");
|
||||
convertor.put(new Integer(253), "ý");
|
||||
convertor.put(new Integer(254), "þ");
|
||||
convertor.put(new Integer(255), "ÿ");
|
||||
} */
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -197,16 +234,16 @@ public final class HtmlEncoder {
|
|||
ret.append ('>');
|
||||
break;
|
||||
default:
|
||||
ret.append (c);
|
||||
// if (c < 160)
|
||||
// ret.append ((char) c);
|
||||
// else if (c >= 160 && c <= 255)
|
||||
// ret.append (convertor.get(new Integer(c)));
|
||||
// else {
|
||||
// ret.append ("&#");
|
||||
// ret.append (c);
|
||||
// ret.append (";");
|
||||
// }
|
||||
// ret.append (c);
|
||||
if (c < 128)
|
||||
ret.append (c);
|
||||
else if (c >= 128 && c < 256)
|
||||
ret.append (transform[c-128]);
|
||||
else {
|
||||
ret.append ("&#");
|
||||
ret.append ((int) c);
|
||||
ret.append (";");
|
||||
}
|
||||
if (!tagOpen && !Character.isWhitespace (c))
|
||||
swallowOneNewline = false;
|
||||
}
|
||||
|
@ -271,16 +308,16 @@ public final class HtmlEncoder {
|
|||
}
|
||||
break;
|
||||
default:
|
||||
ret.append (c);
|
||||
// if (c < 160)
|
||||
// ret.append ((char) c);
|
||||
// else if (c >= 160 && c <= 255)
|
||||
// ret.append (convertor.get(new Integer(c)));
|
||||
// else {
|
||||
// ret.append ("&#");
|
||||
// ret.append (c);
|
||||
// ret.append (";");
|
||||
// }
|
||||
// ret.append (c);
|
||||
if (c < 128)
|
||||
ret.append (c);
|
||||
else if (c >= 128 && c < 256)
|
||||
ret.append (transform[c-128]);
|
||||
else {
|
||||
ret.append ("&#");
|
||||
ret.append ((int) c);
|
||||
ret.append (";");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -315,5 +352,26 @@ public final class HtmlEncoder {
|
|||
}
|
||||
}
|
||||
|
||||
// test method
|
||||
public static String printCharRange (int from, int to) {
|
||||
StringBuffer response = new StringBuffer();
|
||||
for (int i=from;i<to;i++) {
|
||||
response.append (i);
|
||||
response.append (" ");
|
||||
response.append ((char) i);
|
||||
response.append (" ");
|
||||
if (i < 128)
|
||||
response.append ((char) i);
|
||||
else if (i >= 128 && i < 256)
|
||||
response.append (transform[i-128]);
|
||||
else {
|
||||
response.append ("&#");
|
||||
response.append (i);
|
||||
response.append (";");
|
||||
}
|
||||
response.append ("\r\n");
|
||||
}
|
||||
return response.toString();
|
||||
}
|
||||
|
||||
} // end of class
|
||||
|
|
Loading…
Add table
Reference in a new issue