Helma encode() functions now do entity encoding

again, but the right way. It transforms special Windows characters (such as smart quotes)
top valid Unicode, uses symbolic entities for ISO-8859-1 characters and numeric entities
fpr everything above.
This commit is contained in:
hns 2002-05-27 16:52:34 +00:00
parent 5a589d4bd6
commit c424e54261

View file

@ -17,109 +17,146 @@ import java.text.*;
public final class HtmlEncoder {
// transformation table for characters 128 to 255. These actually fall into two
// groups, put together for efficiency: "Windows" chacacters 128-159 such as
// "smart quotes", which are encoded to valid Unicode entities, and
// valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML
// entity. Everything >= 256 is encoded to a numeric entity.
//
// for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
//
static final String[] transform = {
"€", // 128
"", // empty string means character is undefined in unicode
"‚",
"ƒ",
"„",
"…",
"†",
"‡",
"ˆ",
"‰",
"Š",
"‹",
"Œ",
"",
"Ž",
"",
"",
"‘",
"’",
"“",
"”",
"•",
"–",
"—",
"˜",
"™",
"š",
"›",
"œ",
"",
"ž",
"Ÿ", // 159
" ", // 160
"¡",
"¢",
"£",
"¤",
"¥",
"¦",
"§",
"¨",
"©",
"ª",
"«",
"¬",
"­",
"®",
"¯",
"°",
"±",
"²",
"³",
"´",
"µ",
"¶",
"·",
"¸",
"¹",
"º",
"»",
"¼",
"½",
"¾",
"¿",
"À",
"Á",
"Â",
"Ã",
"Ä",
"Å",
"Æ",
"Ç",
"È",
"É",
"Ê",
"Ë",
"Ì",
"Í",
"Î",
"Ï",
"Ð",
"Ñ",
"Ò",
"Ó",
"Ô",
"Õ",
"Ö",
"×",
"Ø",
"Ù",
"Ú",
"Û",
"Ü",
"Ý",
"Þ",
"ß",
"à",
"á",
"â",
"ã",
"ä",
"å",
"æ",
"ç",
"è",
"é",
"ê",
"ë",
"ì",
"í",
"î",
"ï",
"ð",
"ñ",
"ò",
"ó",
"ô",
"õ",
"ö",
"÷",
"ø",
"ù",
"ú",
"û",
"ü",
"ý",
"þ",
"ÿ" // 255
};
/*
static final Hashtable convertor = new Hashtable (128);
// conversion table
static {
convertor.put(new Integer(160), " ");
convertor.put(new Integer(161), "¡");
convertor.put(new Integer(162), "¢");
convertor.put(new Integer(163), "£");
convertor.put(new Integer(164), "¤");
convertor.put(new Integer(165), "¥");
convertor.put(new Integer(166), "¦");
convertor.put(new Integer(167), "§");
convertor.put(new Integer(168), "¨");
convertor.put(new Integer(169), "©");
convertor.put(new Integer(170), "ª");
convertor.put(new Integer(171), "«");
convertor.put(new Integer(172), "¬");
convertor.put(new Integer(173), "­");
convertor.put(new Integer(174), "®");
convertor.put(new Integer(175), "¯");
convertor.put(new Integer(176), "°");
convertor.put(new Integer(177), "±");
convertor.put(new Integer(178), "²");
convertor.put(new Integer(179), "³");
convertor.put(new Integer(180), "´");
convertor.put(new Integer(181), "µ");
convertor.put(new Integer(182), "¶");
convertor.put(new Integer(183), "·");
convertor.put(new Integer(184), "¸");
convertor.put(new Integer(185), "¹");
convertor.put(new Integer(186), "º");
convertor.put(new Integer(187), "»");
convertor.put(new Integer(188), "¼");
convertor.put(new Integer(189), "½");
convertor.put(new Integer(190), "¾");
convertor.put(new Integer(191), "¿");
convertor.put(new Integer(192), "À");
convertor.put(new Integer(193), "Á");
convertor.put(new Integer(194), "Â");
convertor.put(new Integer(195), "Ã");
convertor.put(new Integer(196), "Ä");
convertor.put(new Integer(197), "Å");
convertor.put(new Integer(198), "Æ");
convertor.put(new Integer(199), "Ç");
convertor.put(new Integer(200), "È");
convertor.put(new Integer(201), "É");
convertor.put(new Integer(202), "Ê");
convertor.put(new Integer(203), "Ë");
convertor.put(new Integer(204), "Ì");
convertor.put(new Integer(205), "Í");
convertor.put(new Integer(206), "Î");
convertor.put(new Integer(207), "Ï");
convertor.put(new Integer(208), "Ð");
convertor.put(new Integer(209), "Ñ");
convertor.put(new Integer(210), "Ò");
convertor.put(new Integer(211), "Ó");
convertor.put(new Integer(212), "Ô");
convertor.put(new Integer(213), "Õ");
convertor.put(new Integer(214), "Ö");
convertor.put(new Integer(215), "×");
convertor.put(new Integer(216), "Ø");
convertor.put(new Integer(217), "Ù");
convertor.put(new Integer(218), "Ú");
convertor.put(new Integer(219), "Û");
convertor.put(new Integer(220), "Ü");
convertor.put(new Integer(221), "Ý");
convertor.put(new Integer(222), "Þ");
convertor.put(new Integer(223), "ß");
convertor.put(new Integer(224), "à");
convertor.put(new Integer(225), "á");
convertor.put(new Integer(226), "â");
convertor.put(new Integer(227), "ã");
convertor.put(new Integer(228), "ä");
convertor.put(new Integer(229), "å");
convertor.put(new Integer(230), "æ");
convertor.put(new Integer(231), "ç");
convertor.put(new Integer(232), "è");
convertor.put(new Integer(233), "é");
convertor.put(new Integer(234), "ê");
convertor.put(new Integer(235), "ë");
convertor.put(new Integer(236), "ì");
convertor.put(new Integer(237), "í");
convertor.put(new Integer(238), "î");
convertor.put(new Integer(239), "ï");
convertor.put(new Integer(240), "ð");
convertor.put(new Integer(241), "ñ");
convertor.put(new Integer(242), "ò");
convertor.put(new Integer(243), "ó");
convertor.put(new Integer(244), "ô");
convertor.put(new Integer(245), "õ");
convertor.put(new Integer(246), "ö");
convertor.put(new Integer(247), "÷");
convertor.put(new Integer(248), "ø");
convertor.put(new Integer(249), "ù");
convertor.put(new Integer(250), "ú");
convertor.put(new Integer(251), "û");
convertor.put(new Integer(252), "ü");
convertor.put(new Integer(253), "ý");
convertor.put(new Integer(254), "þ");
convertor.put(new Integer(255), "ÿ");
} */
/**
*
@ -197,16 +234,16 @@ public final class HtmlEncoder {
ret.append ('>');
break;
default:
ret.append (c);
// if (c < 160)
// ret.append ((char) c);
// else if (c >= 160 && c <= 255)
// ret.append (convertor.get(new Integer(c)));
// else {
// ret.append ("&#");
// ret.append (c);
// ret.append (";");
// }
if (c < 128)
ret.append (c);
else if (c >= 128 && c < 256)
ret.append (transform[c-128]);
else {
ret.append ("&#");
ret.append ((int) c);
ret.append (";");
}
if (!tagOpen && !Character.isWhitespace (c))
swallowOneNewline = false;
}
@ -271,16 +308,16 @@ public final class HtmlEncoder {
}
break;
default:
ret.append (c);
// if (c < 160)
// ret.append ((char) c);
// else if (c >= 160 && c <= 255)
// ret.append (convertor.get(new Integer(c)));
// else {
// ret.append ("&#");
// ret.append (c);
// ret.append (";");
// }
if (c < 128)
ret.append (c);
else if (c >= 128 && c < 256)
ret.append (transform[c-128]);
else {
ret.append ("&#");
ret.append ((int) c);
ret.append (";");
}
}
}
}
@ -315,5 +352,26 @@ public final class HtmlEncoder {
}
}
// test method
public static String printCharRange (int from, int to) {
StringBuffer response = new StringBuffer();
for (int i=from;i<to;i++) {
response.append (i);
response.append (" ");
response.append ((char) i);
response.append (" ");
if (i < 128)
response.append ((char) i);
else if (i >= 128 && i < 256)
response.append (transform[i-128]);
else {
response.append ("&#");
response.append (i);
response.append (";");
}
response.append ("\r\n");
}
return response.toString();
}
} // end of class