diff --git a/src/helma/util/HtmlEncoder.java b/src/helma/util/HtmlEncoder.java index 443f0095..d9b23fa9 100644 --- a/src/helma/util/HtmlEncoder.java +++ b/src/helma/util/HtmlEncoder.java @@ -17,109 +17,146 @@ import java.text.*; public final class HtmlEncoder { + // transformation table for characters 128 to 255. These actually fall into two + // groups, put together for efficiency: "Windows" chacacters 128-159 such as + // "smart quotes", which are encoded to valid Unicode entities, and + // valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML + // entity. Everything >= 256 is encoded to a numeric entity. + // + // for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and + // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT + // + static final String[] transform = { + "€", // 128 + "", // empty string means character is undefined in unicode + "‚", + "ƒ", + "„", + "…", + "†", + "‡", + "ˆ", + "‰", + "Š", + "‹", + "Œ", + "", + "Ž", + "", + "", + "‘", + "’", + "“", + "”", + "•", + "–", + "—", + "˜", + "™", + "š", + "›", + "œ", + "", + "ž", + "Ÿ", // 159 + " ", // 160 + "¡", + "¢", + "£", + "¤", + "¥", + "¦", + "§", + "¨", + "©", + "ª", + "«", + "¬", + "­", + "®", + "¯", + "°", + "±", + "²", + "³", + "´", + "µ", + "¶", + "·", + "¸", + "¹", + "º", + "»", + "¼", + "½", + "¾", + "¿", + "À", + "Á", + "Â", + "Ã", + "Ä", + "Å", + "Æ", + "Ç", + "È", + "É", + "Ê", + "Ë", + "Ì", + "Í", + "Î", + "Ï", + "Ð", + "Ñ", + "Ò", + "Ó", + "Ô", + "Õ", + "Ö", + "×", + "Ø", + "Ù", + "Ú", + "Û", + "Ü", + "Ý", + "Þ", + "ß", + "à", + "á", + "â", + "ã", + "ä", + "å", + "æ", + "ç", + "è", + "é", + "ê", + "ë", + "ì", + "í", + "î", + "ï", + "ð", + "ñ", + "ò", + "ó", + "ô", + "õ", + "ö", + "÷", + "ø", + "ù", + "ú", + "û", + "ü", + "ý", + "þ", + "ÿ" // 255 + }; - /* - static final Hashtable convertor = new Hashtable (128); - - // conversion table - static { - convertor.put(new Integer(160), " "); - convertor.put(new Integer(161), "¡"); - convertor.put(new Integer(162), "¢"); - convertor.put(new Integer(163), "£"); - convertor.put(new Integer(164), "¤"); - convertor.put(new Integer(165), "¥"); - convertor.put(new Integer(166), "¦"); - convertor.put(new Integer(167), "§"); - convertor.put(new Integer(168), "¨"); - convertor.put(new Integer(169), "©"); - convertor.put(new Integer(170), "ª"); - convertor.put(new Integer(171), "«"); - convertor.put(new Integer(172), "¬"); - convertor.put(new Integer(173), "­"); - convertor.put(new Integer(174), "®"); - convertor.put(new Integer(175), "¯"); - convertor.put(new Integer(176), "°"); - convertor.put(new Integer(177), "±"); - convertor.put(new Integer(178), "²"); - convertor.put(new Integer(179), "³"); - convertor.put(new Integer(180), "´"); - convertor.put(new Integer(181), "µ"); - convertor.put(new Integer(182), "¶"); - convertor.put(new Integer(183), "·"); - convertor.put(new Integer(184), "¸"); - convertor.put(new Integer(185), "¹"); - convertor.put(new Integer(186), "º"); - convertor.put(new Integer(187), "»"); - convertor.put(new Integer(188), "¼"); - convertor.put(new Integer(189), "½"); - convertor.put(new Integer(190), "¾"); - convertor.put(new Integer(191), "¿"); - convertor.put(new Integer(192), "À"); - convertor.put(new Integer(193), "Á"); - convertor.put(new Integer(194), "Â"); - convertor.put(new Integer(195), "Ã"); - convertor.put(new Integer(196), "Ä"); - convertor.put(new Integer(197), "Å"); - convertor.put(new Integer(198), "Æ"); - convertor.put(new Integer(199), "Ç"); - convertor.put(new Integer(200), "È"); - convertor.put(new Integer(201), "É"); - convertor.put(new Integer(202), "Ê"); - convertor.put(new Integer(203), "Ë"); - convertor.put(new Integer(204), "Ì"); - convertor.put(new Integer(205), "Í"); - convertor.put(new Integer(206), "Î"); - convertor.put(new Integer(207), "Ï"); - convertor.put(new Integer(208), "Ð"); - convertor.put(new Integer(209), "Ñ"); - convertor.put(new Integer(210), "Ò"); - convertor.put(new Integer(211), "Ó"); - convertor.put(new Integer(212), "Ô"); - convertor.put(new Integer(213), "Õ"); - convertor.put(new Integer(214), "Ö"); - convertor.put(new Integer(215), "×"); - convertor.put(new Integer(216), "Ø"); - convertor.put(new Integer(217), "Ù"); - convertor.put(new Integer(218), "Ú"); - convertor.put(new Integer(219), "Û"); - convertor.put(new Integer(220), "Ü"); - convertor.put(new Integer(221), "Ý"); - convertor.put(new Integer(222), "Þ"); - convertor.put(new Integer(223), "ß"); - convertor.put(new Integer(224), "à"); - convertor.put(new Integer(225), "á"); - convertor.put(new Integer(226), "â"); - convertor.put(new Integer(227), "ã"); - convertor.put(new Integer(228), "ä"); - convertor.put(new Integer(229), "å"); - convertor.put(new Integer(230), "æ"); - convertor.put(new Integer(231), "ç"); - convertor.put(new Integer(232), "è"); - convertor.put(new Integer(233), "é"); - convertor.put(new Integer(234), "ê"); - convertor.put(new Integer(235), "ë"); - convertor.put(new Integer(236), "ì"); - convertor.put(new Integer(237), "í"); - convertor.put(new Integer(238), "î"); - convertor.put(new Integer(239), "ï"); - convertor.put(new Integer(240), "ð"); - convertor.put(new Integer(241), "ñ"); - convertor.put(new Integer(242), "ò"); - convertor.put(new Integer(243), "ó"); - convertor.put(new Integer(244), "ô"); - convertor.put(new Integer(245), "õ"); - convertor.put(new Integer(246), "ö"); - convertor.put(new Integer(247), "÷"); - convertor.put(new Integer(248), "ø"); - convertor.put(new Integer(249), "ù"); - convertor.put(new Integer(250), "ú"); - convertor.put(new Integer(251), "û"); - convertor.put(new Integer(252), "ü"); - convertor.put(new Integer(253), "ý"); - convertor.put(new Integer(254), "þ"); - convertor.put(new Integer(255), "ÿ"); - } */ /** * @@ -197,16 +234,16 @@ public final class HtmlEncoder { ret.append ('>'); break; default: - ret.append (c); - // if (c < 160) - // ret.append ((char) c); - // else if (c >= 160 && c <= 255) - // ret.append (convertor.get(new Integer(c))); - // else { - // ret.append ("&#"); - // ret.append (c); - // ret.append (";"); - // } + // ret.append (c); + if (c < 128) + ret.append (c); + else if (c >= 128 && c < 256) + ret.append (transform[c-128]); + else { + ret.append ("&#"); + ret.append ((int) c); + ret.append (";"); + } if (!tagOpen && !Character.isWhitespace (c)) swallowOneNewline = false; } @@ -271,16 +308,16 @@ public final class HtmlEncoder { } break; default: - ret.append (c); - // if (c < 160) - // ret.append ((char) c); - // else if (c >= 160 && c <= 255) - // ret.append (convertor.get(new Integer(c))); - // else { - // ret.append ("&#"); - // ret.append (c); - // ret.append (";"); - // } + // ret.append (c); + if (c < 128) + ret.append (c); + else if (c >= 128 && c < 256) + ret.append (transform[c-128]); + else { + ret.append ("&#"); + ret.append ((int) c); + ret.append (";"); + } } } } @@ -315,5 +352,26 @@ public final class HtmlEncoder { } } + // test method + public static String printCharRange (int from, int to) { + StringBuffer response = new StringBuffer(); + for (int i=from;i= 128 && i < 256) + response.append (transform[i-128]); + else { + response.append ("&#"); + response.append (i); + response.append (";"); + } + response.append ("\r\n"); + } + return response.toString(); + } } // end of class