// // Jala Project [http://opensvn.csie.org/traccgi/jala] // // Copyright 2004 ORF Online und Teletext GmbH // // Licensed under the Apache License, Version 2.0 (the ``License''); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an ``AS IS'' BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // $Revision$ // $LastChangedBy$ // $LastChangedDate$ // $HeadURL$ // /** * @fileoverview Fields and methods of the jala.HtmlDocument class. */ // Define the global namespace for Jala modules if (!global.jala) { global.jala = {}; } /** * Jala dependencies */ (function() { var jalaDir = getProperty("jala.dir", "modules/jala"); app.addRepository(jalaDir + "/lib/dom4j-1.6.1.jar"); app.addRepository(jalaDir + "/lib/jaxen-1.1-beta-8.jar"); })(); /** * Construct a new HTML document. * @class This class provides easy access to the elements of * an arbitrary HTML document. By using TagSoup, Dom4J and Jaxen * even invalid HTML can be parsed, turned into an object tree * and easily be processed with XPath expressions. * @param {String} source The HTML source code. * @returns A new HTML document. * @constructor */ jala.HtmlDocument = function(source) { var REQUIREMENTS = { "dom4j-1.6.1": "http://www.dom4j.org", "jaxen-1.1-beta-8": "http://www.jaxen.org" }; var reader = new java.io.StringReader(source); var dom4j = Packages.org.dom4j; var tagsoup = "org.ccil.cowan.tagsoup.Parser"; try { var saxReader = new dom4j.io.SAXReader(tagsoup); var document = saxReader.read(reader); document.normalize(); } catch(e) { res.push(); res.write("\njala.HtmlDocument requires the following Java "); res.write("packages in ext/lib or application directory:\n"); for (var i in REQUIREMENTS) { res.write(i); res.write(".jar"); res.write(" ["); res.write(REQUIREMENTS[i]); res.write("]\n"); } throw (e + res.pop()); } /** * Get all document nodes from an XPath expression. * @param {String} xpathExpr An XPath expression. * @returns A list of HTML elements. * @type org.dom4j.tree.DefaultElement */ this.scrape = function(xpathExpr) { return document.selectNodes(xpathExpr); }; /** * Get all link elements of the HTML document. * @returns A list of link elements. * @type Array */ this.getLinks = function() { var result = []; var list = this.scrape("//html:a"); for (var i=0; i 0) { object.attributes = new Array; for (n=0; n