diff --git a/src/helma/util/HtmlEncoder.java b/src/helma/util/HtmlEncoder.java
index 443f0095..d9b23fa9 100644
--- a/src/helma/util/HtmlEncoder.java
+++ b/src/helma/util/HtmlEncoder.java
@@ -17,109 +17,146 @@ import java.text.*;
public final class HtmlEncoder {
+ // transformation table for characters 128 to 255. These actually fall into two
+ // groups, put together for efficiency: "Windows" chacacters 128-159 such as
+ // "smart quotes", which are encoded to valid Unicode entities, and
+ // valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML
+ // entity. Everything >= 256 is encoded to a numeric entity.
+ //
+ // for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and
+ // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
+ //
+ static final String[] transform = {
+ "€", // 128
+ "", // empty string means character is undefined in unicode
+ "‚",
+ "ƒ",
+ "„",
+ "…",
+ "†",
+ "‡",
+ "ˆ",
+ "‰",
+ "Š",
+ "‹",
+ "Œ",
+ "",
+ "Ž",
+ "",
+ "",
+ "‘",
+ "’",
+ "“",
+ "”",
+ "•",
+ "–",
+ "—",
+ "˜",
+ "™",
+ "š",
+ "›",
+ "œ",
+ "",
+ "ž",
+ "Ÿ", // 159
+ " ", // 160
+ "¡",
+ "¢",
+ "£",
+ "¤",
+ "¥",
+ "¦",
+ "§",
+ "¨",
+ "©",
+ "ª",
+ "«",
+ "¬",
+ "",
+ "®",
+ "¯",
+ "°",
+ "±",
+ "²",
+ "³",
+ "´",
+ "µ",
+ "¶",
+ "·",
+ "¸",
+ "¹",
+ "º",
+ "»",
+ "¼",
+ "½",
+ "¾",
+ "¿",
+ "À",
+ "Á",
+ "Â",
+ "Ã",
+ "Ä",
+ "Å",
+ "Æ",
+ "Ç",
+ "È",
+ "É",
+ "Ê",
+ "Ë",
+ "Ì",
+ "Í",
+ "Î",
+ "Ï",
+ "Ð",
+ "Ñ",
+ "Ò",
+ "Ó",
+ "Ô",
+ "Õ",
+ "Ö",
+ "×",
+ "Ø",
+ "Ù",
+ "Ú",
+ "Û",
+ "Ü",
+ "Ý",
+ "Þ",
+ "ß",
+ "à",
+ "á",
+ "â",
+ "ã",
+ "ä",
+ "å",
+ "æ",
+ "ç",
+ "è",
+ "é",
+ "ê",
+ "ë",
+ "ì",
+ "í",
+ "î",
+ "ï",
+ "ð",
+ "ñ",
+ "ò",
+ "ó",
+ "ô",
+ "õ",
+ "ö",
+ "÷",
+ "ø",
+ "ù",
+ "ú",
+ "û",
+ "ü",
+ "ý",
+ "þ",
+ "ÿ" // 255
+ };
- /*
- static final Hashtable convertor = new Hashtable (128);
-
- // conversion table
- static {
- convertor.put(new Integer(160), " ");
- convertor.put(new Integer(161), "¡");
- convertor.put(new Integer(162), "¢");
- convertor.put(new Integer(163), "£");
- convertor.put(new Integer(164), "¤");
- convertor.put(new Integer(165), "¥");
- convertor.put(new Integer(166), "¦");
- convertor.put(new Integer(167), "§");
- convertor.put(new Integer(168), "¨");
- convertor.put(new Integer(169), "©");
- convertor.put(new Integer(170), "ª");
- convertor.put(new Integer(171), "«");
- convertor.put(new Integer(172), "¬");
- convertor.put(new Integer(173), "");
- convertor.put(new Integer(174), "®");
- convertor.put(new Integer(175), "¯");
- convertor.put(new Integer(176), "°");
- convertor.put(new Integer(177), "±");
- convertor.put(new Integer(178), "²");
- convertor.put(new Integer(179), "³");
- convertor.put(new Integer(180), "´");
- convertor.put(new Integer(181), "µ");
- convertor.put(new Integer(182), "¶");
- convertor.put(new Integer(183), "·");
- convertor.put(new Integer(184), "¸");
- convertor.put(new Integer(185), "¹");
- convertor.put(new Integer(186), "º");
- convertor.put(new Integer(187), "»");
- convertor.put(new Integer(188), "¼");
- convertor.put(new Integer(189), "½");
- convertor.put(new Integer(190), "¾");
- convertor.put(new Integer(191), "¿");
- convertor.put(new Integer(192), "À");
- convertor.put(new Integer(193), "Á");
- convertor.put(new Integer(194), "Â");
- convertor.put(new Integer(195), "Ã");
- convertor.put(new Integer(196), "Ä");
- convertor.put(new Integer(197), "Å");
- convertor.put(new Integer(198), "Æ");
- convertor.put(new Integer(199), "Ç");
- convertor.put(new Integer(200), "È");
- convertor.put(new Integer(201), "É");
- convertor.put(new Integer(202), "Ê");
- convertor.put(new Integer(203), "Ë");
- convertor.put(new Integer(204), "Ì");
- convertor.put(new Integer(205), "Í");
- convertor.put(new Integer(206), "Î");
- convertor.put(new Integer(207), "Ï");
- convertor.put(new Integer(208), "Ð");
- convertor.put(new Integer(209), "Ñ");
- convertor.put(new Integer(210), "Ò");
- convertor.put(new Integer(211), "Ó");
- convertor.put(new Integer(212), "Ô");
- convertor.put(new Integer(213), "Õ");
- convertor.put(new Integer(214), "Ö");
- convertor.put(new Integer(215), "×");
- convertor.put(new Integer(216), "Ø");
- convertor.put(new Integer(217), "Ù");
- convertor.put(new Integer(218), "Ú");
- convertor.put(new Integer(219), "Û");
- convertor.put(new Integer(220), "Ü");
- convertor.put(new Integer(221), "Ý");
- convertor.put(new Integer(222), "Þ");
- convertor.put(new Integer(223), "ß");
- convertor.put(new Integer(224), "à");
- convertor.put(new Integer(225), "á");
- convertor.put(new Integer(226), "â");
- convertor.put(new Integer(227), "ã");
- convertor.put(new Integer(228), "ä");
- convertor.put(new Integer(229), "å");
- convertor.put(new Integer(230), "æ");
- convertor.put(new Integer(231), "ç");
- convertor.put(new Integer(232), "è");
- convertor.put(new Integer(233), "é");
- convertor.put(new Integer(234), "ê");
- convertor.put(new Integer(235), "ë");
- convertor.put(new Integer(236), "ì");
- convertor.put(new Integer(237), "í");
- convertor.put(new Integer(238), "î");
- convertor.put(new Integer(239), "ï");
- convertor.put(new Integer(240), "ð");
- convertor.put(new Integer(241), "ñ");
- convertor.put(new Integer(242), "ò");
- convertor.put(new Integer(243), "ó");
- convertor.put(new Integer(244), "ô");
- convertor.put(new Integer(245), "õ");
- convertor.put(new Integer(246), "ö");
- convertor.put(new Integer(247), "÷");
- convertor.put(new Integer(248), "ø");
- convertor.put(new Integer(249), "ù");
- convertor.put(new Integer(250), "ú");
- convertor.put(new Integer(251), "û");
- convertor.put(new Integer(252), "ü");
- convertor.put(new Integer(253), "ý");
- convertor.put(new Integer(254), "þ");
- convertor.put(new Integer(255), "ÿ");
- } */
/**
*
@@ -197,16 +234,16 @@ public final class HtmlEncoder {
ret.append ('>');
break;
default:
- ret.append (c);
- // if (c < 160)
- // ret.append ((char) c);
- // else if (c >= 160 && c <= 255)
- // ret.append (convertor.get(new Integer(c)));
- // else {
- // ret.append ("");
- // ret.append (c);
- // ret.append (";");
- // }
+ // ret.append (c);
+ if (c < 128)
+ ret.append (c);
+ else if (c >= 128 && c < 256)
+ ret.append (transform[c-128]);
+ else {
+ ret.append ("");
+ ret.append ((int) c);
+ ret.append (";");
+ }
if (!tagOpen && !Character.isWhitespace (c))
swallowOneNewline = false;
}
@@ -271,16 +308,16 @@ public final class HtmlEncoder {
}
break;
default:
- ret.append (c);
- // if (c < 160)
- // ret.append ((char) c);
- // else if (c >= 160 && c <= 255)
- // ret.append (convertor.get(new Integer(c)));
- // else {
- // ret.append ("");
- // ret.append (c);
- // ret.append (";");
- // }
+ // ret.append (c);
+ if (c < 128)
+ ret.append (c);
+ else if (c >= 128 && c < 256)
+ ret.append (transform[c-128]);
+ else {
+ ret.append ("");
+ ret.append ((int) c);
+ ret.append (";");
+ }
}
}
}
@@ -315,5 +352,26 @@ public final class HtmlEncoder {
}
}
+ // test method
+ public static String printCharRange (int from, int to) {
+ StringBuffer response = new StringBuffer();
+ for (int i=from;i= 128 && i < 256)
+ response.append (transform[i-128]);
+ else {
+ response.append ("");
+ response.append (i);
+ response.append (";");
+ }
+ response.append ("\r\n");
+ }
+ return response.toString();
+ }
} // end of class