chg: replaced ant with gradle
This commit is contained in:
parent
ced560f0c7
commit
7eebeee1d0
615 changed files with 87626 additions and 638 deletions
156
modules/jala/code/HtmlDocument.js
Normal file
156
modules/jala/code/HtmlDocument.js
Normal file
|
@ -0,0 +1,156 @@
|
|||
//
|
||||
// Jala Project [http://opensvn.csie.org/traccgi/jala]
|
||||
//
|
||||
// Copyright 2004 ORF Online und Teletext GmbH
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the ``License'');
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an ``AS IS'' BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// $Revision$
|
||||
// $LastChangedBy$
|
||||
// $LastChangedDate$
|
||||
// $HeadURL$
|
||||
//
|
||||
|
||||
/**
|
||||
* @fileoverview Fields and methods of the jala.HtmlDocument class.
|
||||
*/
|
||||
|
||||
// Define the global namespace for Jala modules
|
||||
if (!global.jala) {
|
||||
global.jala = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Jala dependencies
|
||||
*/
|
||||
(function() {
|
||||
var jalaDir = getProperty("jala.dir", "modules/jala");
|
||||
app.addRepository(jalaDir + "/lib/dom4j-1.6.1.jar");
|
||||
app.addRepository(jalaDir + "/lib/jaxen-1.1-beta-8.jar");
|
||||
})();
|
||||
|
||||
/**
|
||||
* Construct a new HTML document.
|
||||
* @class This class provides easy access to the elements of
|
||||
* an arbitrary HTML document. By using TagSoup, Dom4J and Jaxen
|
||||
* even invalid HTML can be parsed, turned into an object tree
|
||||
* and easily be processed with XPath expressions.
|
||||
* @param {String} source The HTML source code.
|
||||
* @returns A new HTML document.
|
||||
* @constructor
|
||||
*/
|
||||
jala.HtmlDocument = function(source) {
|
||||
var REQUIREMENTS = {
|
||||
"dom4j-1.6.1": "http://www.dom4j.org",
|
||||
"jaxen-1.1-beta-8": "http://www.jaxen.org"
|
||||
};
|
||||
|
||||
var reader = new java.io.StringReader(source);
|
||||
var dom4j = Packages.org.dom4j;
|
||||
var tagsoup = "org.ccil.cowan.tagsoup.Parser";
|
||||
|
||||
try {
|
||||
var saxReader = new dom4j.io.SAXReader(tagsoup);
|
||||
var document = saxReader.read(reader);
|
||||
document.normalize();
|
||||
} catch(e) {
|
||||
res.push();
|
||||
res.write("\njala.HtmlDocument requires the following Java ");
|
||||
res.write("packages in ext/lib or application directory:\n");
|
||||
for (var i in REQUIREMENTS) {
|
||||
res.write(i);
|
||||
res.write(".jar");
|
||||
res.write(" [");
|
||||
res.write(REQUIREMENTS[i]);
|
||||
res.write("]\n");
|
||||
}
|
||||
throw (e + res.pop());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all document nodes from an XPath expression.
|
||||
* @param {String} xpathExpr An XPath expression.
|
||||
* @returns A list of HTML elements.
|
||||
* @type org.dom4j.tree.DefaultElement
|
||||
*/
|
||||
this.scrape = function(xpathExpr) {
|
||||
return document.selectNodes(xpathExpr);
|
||||
};
|
||||
|
||||
/**
|
||||
* Get all link elements of the HTML document.
|
||||
* @returns A list of link elements.
|
||||
* @type Array
|
||||
*/
|
||||
this.getLinks = function() {
|
||||
var result = [];
|
||||
var list = this.scrape("//html:a");
|
||||
for (var i=0; i<list.size(); i+=1) {
|
||||
var element = list.get(i);
|
||||
var text = element.getText();
|
||||
var href = element.attribute("href");
|
||||
if (text && href) {
|
||||
result.push({
|
||||
text: text,
|
||||
url: href.getText()
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/**
|
||||
* Retrieves all elements by name from the document.
|
||||
* The returned object structure is compatible for usage
|
||||
* in {@link jala.XmlWriter}.
|
||||
* @param {String} elementName The name of the desired element
|
||||
* @returns The list of available elements in the document
|
||||
* @type Array
|
||||
*/
|
||||
this.getAll = function(elementName) {
|
||||
var result = [], object;
|
||||
var list = this.scrape("//html:" + elementName);
|
||||
var i, n, element, text, attributes, attr, size;
|
||||
for (i=0; i<list.size(); i+=1) {
|
||||
element = list.get(i);
|
||||
object = {
|
||||
name: element.getName(),
|
||||
value: element.getText() || null
|
||||
};
|
||||
attributes = element.attributes();
|
||||
if ((size = attributes.size()) > 0) {
|
||||
object.attributes = new Array;
|
||||
for (n=0; n<size; n+=1) {
|
||||
attr = attributes.get(n);
|
||||
object.attributes.push({
|
||||
name: attr.getName(),
|
||||
value: attr.getData() || null
|
||||
});
|
||||
}
|
||||
}
|
||||
result.push(object);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a string representation of the HTML document.
|
||||
* @returns A string representation of the HTML document.
|
||||
* @type String
|
||||
*/
|
||||
this.toString = function() {
|
||||
return "[jala.HtmlDocument " + source.length + " Bytes]";
|
||||
};
|
||||
|
||||
return this;
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue