Reworked formatting code

Implemented paragraph formatting Fixed bug with newlines in pre tag
2003-09-19 16:12:01 +00:00 · 2003-09-19 16:12:01 +00:00 · 253ca75822
commit 253ca75822
parent 2015aeef89
1 changed files with 132 additions and 75 deletions
--- a/src/helma/util/HtmlEncoder.java
+++ b/src/helma/util/HtmlEncoder.java
@ -278,49 +278,53 @@ public final class HtmlEncoder {
    // conversion around them to look good. However, they differ
    // in how many newlines around them should ignored. These sets
    // help to treat each tag right in newline conversion.
-    static final HashSet swallowAll = new HashSet();
+    static final HashSet internalTags = new HashSet();
-    static final HashSet swallowTwo = new HashSet();
+    static final HashSet blockTags = new HashSet();
-    static final HashSet swallowOne = new HashSet();
+    static final HashSet semiBlockTags = new HashSet();
    static {
        // actual block level elements
-        swallowOne.add("address");
+        semiBlockTags.add("address");
-        swallowTwo.add("blockquote");
+        semiBlockTags.add("dir");
-        swallowTwo.add("center");
+        semiBlockTags.add("div");
-        swallowOne.add("dir");
+        semiBlockTags.add("table");
-        swallowOne.add("div");
+
-        swallowTwo.add("dl");
+        blockTags.add("blockquote");
-        swallowTwo.add("fieldset");
+        blockTags.add("center");
-        swallowTwo.add("form");
+        blockTags.add("dl");
-        swallowTwo.add("h1");
+        blockTags.add("fieldset");
-        swallowTwo.add("h2");
+        blockTags.add("form");
-        swallowTwo.add("h3");
+        blockTags.add("h1");
-        swallowTwo.add("h4");
+        blockTags.add("h2");
-        swallowTwo.add("h5");
+        blockTags.add("h3");
-        swallowTwo.add("h6");
+        blockTags.add("h4");
-        swallowTwo.add("hr");
+        blockTags.add("h5");
-        swallowTwo.add("isindex");
+        blockTags.add("h6");
-        swallowAll.add("menu");
+        blockTags.add("hr");
-        swallowAll.add("noframes");
+        blockTags.add("isindex");
-        swallowAll.add("noscript");
+        blockTags.add("ol");
-        swallowTwo.add("ol");
+        blockTags.add("p");
-        swallowTwo.add("p");
+        blockTags.add("pre");
-        swallowTwo.add("pre");
+        blockTags.add("ul");
-        swallowOne.add("table");
+
-        swallowTwo.add("ul");
+        internalTags.add("menu");
        internalTags.add("noframes");
        internalTags.add("noscript");
        /// to be treated as block level elements
-        swallowTwo.add("br");
+        semiBlockTags.add("th");
-        swallowTwo.add("dd");
+
-        swallowTwo.add("dt");
+        blockTags.add("br");
-        swallowTwo.add("frameset");
+        blockTags.add("dd");
-        swallowTwo.add("li");
+        blockTags.add("dt");
-        swallowAll.add("tbody");
+        blockTags.add("frameset");
-        swallowTwo.add("td");
+        blockTags.add("li");
-        swallowAll.add("tfoot");
+        blockTags.add("td");
-        swallowOne.add("th");
+
-        swallowAll.add("thead");
+        internalTags.add("tbody");
-        swallowAll.add("tr");
+        internalTags.add("tfoot");
        internalTags.add("thead");
        internalTags.add("tr");
    }
    // set of tags that are always empty
@ -347,8 +351,12 @@ public final class HtmlEncoder {
    static final byte TAG_ATT_NAME = 2;
    static final byte TAG_ATT_VAL = 3;
-    static final String newLine = System.getProperty("line.separator");
+    static final byte TEXT = 0;
    static final byte SEMIBLOCK = 1;
    static final byte BLOCK = 2;
    static final byte INTERNAL = 3;
    static final String newLine = System.getProperty("line.separator");
    /**
     *  Do "smart" encodging on a string. This means that valid HTML entities and tags,
@ -369,7 +377,7 @@ public final class HtmlEncoder {
        // try to make stringbuffer large enough from the start
        StringBuffer ret = new StringBuffer(Math.round(l * 1.4f));
-        encode(str, ret, null);
+        encode(str, ret, false, null);
        return ret.toString();
    }
@ -380,21 +388,39 @@ public final class HtmlEncoder {
     *  other occurrences of '<', '>' and '&' are encoded to HTML entities.
     */
    public final static void encode(String str, StringBuffer ret) {
-        encode(str, ret, null);
+        encode(str, ret, false, null);
    }
    /**
     *  Do "smart" encodging on a string. This means that valid HTML entities and tags,
     *  Helma macros and HTML comments are passed through unescaped, while
     *  other occurrences of '<', '>' and '&' are encoded to HTML entities.
     *
     *  @param str the string to encode
     *  @param ret the string buffer to encode to
     *  @param paragraphs if true use p tags for paragraphs, otherwise just use br's
     *  @param allowedTags a set containing the names of allowed tags as strings. All other
     *                     tags will be escaped
     */
-    public final static void encode(String str, StringBuffer ret, Set allowedTags) {
+    public final static void encode(String str, StringBuffer ret,
                                    boolean paragraphs, Set allowedTags) {
        if (str == null) {
            return;
        }
        int l = str.length();
        // where to insert the <p> tag in case we want to create a paragraph later on
        int paragraphStart = ret.length();
        // what kind of element/text are we leaving and entering?
        // this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
        // depending on this information, we decide whether and how to insert
        // paragraphs and line breaks. "entering" a tag means we're at the '<'
        // and exiting means we're at the '>', not that it's a start or close tag.
        byte entering = TEXT;
        byte exiting = TEXT;
        Stack openTags = new Stack();
        // are we currently within a < and a > that consitute some kind of tag?
@ -424,15 +450,14 @@ public final class HtmlEncoder {
        char htmlQuoteChar = '\u0000';
        char macroQuoteChar = '\u0000';
        // number of newlines to ignore in \n -> <br> conversion
        int swallowLinebreaks = 0;
        // number of newlines met since the last non-whitespace character
        int linebreaks = 0;
        // did we meet a backslash escape?
        boolean escape = false;
        boolean triggerBreak = false;
        for (int i = 0; i < l; i++) {
            char c = str.charAt(i);
@ -475,20 +500,19 @@ public final class HtmlEncoder {
                                htmlQuoteChar = '\u0000';
                                htmlTagMode = TAG_NAME;
-                                // set ignoreNewline on some tags, depending on wheather they're
+                                exiting = entering;
-                                // being opened or closed.
+                                entering = TEXT;
                                // what's going on here? we switch newline encoding on inside some tags, for
                                // others we switch it on when they're closed
                                linebreaks = Math.max(linebreaks - swallowLinebreaks, 0);
-                                if (swallowAll.contains(tagName)) {
+                                if (internalTags.contains(tagName)) {
-                                    swallowLinebreaks = 1000;
+                                    entering = INTERNAL;
-                                } else if (swallowTwo.contains(tagName)) {
+                                } else if (blockTags.contains(tagName)) {
-                                    swallowLinebreaks = 2;
+                                    entering = BLOCK;
-                                } else if (swallowOne.contains(tagName)) {
+                                } else if (semiBlockTags.contains(tagName)) {
-                                    swallowLinebreaks = 1;
+                                    entering = paragraphs ? BLOCK : SEMIBLOCK;
-                                } else {
+                                }
-                                    swallowLinebreaks = 0;
+
                                if (entering > 0) {
                                    triggerBreak = !insidePreTag;
                                }
                                if (insideCloseTag) {
@ -513,7 +537,6 @@ public final class HtmlEncoder {
                                    openTags.pop();
                                } else {
                                    openTags.push(tagName);
                                    swallowLinebreaks = Math.max(swallowLinebreaks - 1, 0);
                                }
                                if ("code".equals(tagName) && !insideCloseTag) {
@ -526,25 +549,44 @@ public final class HtmlEncoder {
                            }
                        }
                    }
-                }
+                } // if (i < l-2)
                 // if (i < l-2)
            }
-            if ((linebreaks > 0 || swallowLinebreaks > 0) && !Character.isWhitespace(c)) {
+            if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {
-                if (!insidePreTag) {
+
-                    for (int k = 0; k < linebreaks; k++) {
+                if (!insideTag) {
-                        if (k >= swallowLinebreaks) {
+                    exiting = entering;
                    entering = TEXT;
                    if (exiting >= SEMIBLOCK) {
                        paragraphStart = ret.length();
                    }
                }
                if (entering != INTERNAL && exiting != INTERNAL) {
                    int swallowBreaks = 0;
                    if (paragraphs && (entering != BLOCK || exiting != BLOCK) &&
                          (exiting < BLOCK) &&
                          (entering >= SEMIBLOCK || linebreaks > 1) &&
                          paragraphStart < ret.length()) {
                        ret.insert(paragraphStart, "<p>");
                        ret.append("</p>");
                        swallowBreaks = 2;
                    }
                    for (int k = linebreaks-1; k>=0; k--) {
                        if (k >= swallowBreaks && k >= entering && k >= exiting) {
                            ret.append("<br />");
                        }
                        ret.append(newLine);
                    }
-                }
+                    if (exiting >= SEMIBLOCK || linebreaks > 1) {
                        paragraphStart = ret.length();
                    }
                if (!insideTag) {
                    swallowLinebreaks = 0;
                }
                linebreaks = 0;
                triggerBreak = false;
            }
            switch (c) {
@ -632,17 +674,18 @@ public final class HtmlEncoder {
                    break;
                case '\n':
-                    if (!insideTag) {
+                    if (insideTag || insidePreTag) {
                        linebreaks++;
                    } else {
                        ret.append('\n');
                    } else {
                        linebreaks++;
                    }
                    break;
                case '\r':
-                    if (!insideTag) {
+                    if (insideTag || insidePreTag) {
-                        break;
+                        ret.append('\r');
                    }
                    break;
                case '>':
@ -673,12 +716,19 @@ public final class HtmlEncoder {
                                openTags.pop();
                            }
                        }
                        exiting = entering;
                        if (exiting > 0) {
                           triggerBreak = !insidePreTag;
                        }
                    } else {
                        ret.append("&gt;");
                    }
                    // check if we still are inside any kind of tag
                    insideTag = insideComment || insideMacroTag || insideHtmlTag;
                    insideCloseTag = insideTag;
                    break;
@ -739,9 +789,16 @@ public final class HtmlEncoder {
        }
        // add remaining newlines we may have collected
        int swallowBreaks = 0;
        if (paragraphs && exiting < BLOCK) {
            ret.insert(paragraphStart, "<p>");
            ret.append("</p>");
            swallowBreaks = 2;
        }
        if (linebreaks > 0) {
-            for (int i = 0; i < linebreaks; i++) {
+            for (int i = linebreaks-1; i>=0; i--) {
-                if (i >= swallowLinebreaks) {
+                if (i >= swallowBreaks && i > exiting) {
                    ret.append("<br />");
                }
                ret.append(newLine);