Compare commits
27 commits
6ee6b23548
...
77c99be343
Author | SHA1 | Date | |
---|---|---|---|
77c99be343 | |||
87640ed8da | |||
ad4e35f452 | |||
ae79a59639 | |||
354c9788c8 | |||
67573db0e0 | |||
22013f2267 | |||
8370b2ba98 | |||
144d2475cb | |||
96ae3ad7fb | |||
a7fc1df893 | |||
5b929bf0ca | |||
0b326e71e6 | |||
c6d86368c5 | |||
1d59aff5a6 | |||
50b5c4d09e | |||
42a0d7e610 | |||
c5b9a613a8 | |||
362ca05ab8 | |||
a7cabf0d63 | |||
f25b3c0b76 | |||
543070a94f | |||
786a110b48 | |||
348fa4607d | |||
f90d1e0667 | |||
e5216c6c5f | |||
6ed24ef0d2 |
25 changed files with 2087 additions and 469 deletions
47
.github/workflows/deploy.yml
vendored
47
.github/workflows/deploy.yml
vendored
|
@ -1,15 +1,48 @@
|
|||
name: Deploy (Production)
|
||||
name: Deploy
|
||||
|
||||
on: workflow_dispatch
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
hostname:
|
||||
description: Hostname
|
||||
type: string
|
||||
required: true
|
||||
default: antville.org
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
stage:
|
||||
runs-on: antville
|
||||
|
||||
environment:
|
||||
name: antville.org
|
||||
url: https://antville.org
|
||||
name: production
|
||||
url: ${{ inputs.hostname }}
|
||||
|
||||
steps:
|
||||
- name: Copy files to production server
|
||||
run: ssh staging-server deploy-antville
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build with Gradle
|
||||
run: ./gradlew :build
|
||||
|
||||
- name: Copy files to server
|
||||
run: |
|
||||
rsync ./build/install/antville/ ${{ inputs.hostname }}:./apps/antville/ \
|
||||
--archive --compress --delete --verbose \
|
||||
--filter '+ /claustra' \
|
||||
--filter '+ /code' \
|
||||
--filter '+ /compat' \
|
||||
--filter '+ /db' \
|
||||
--filter '+ /i18n' \
|
||||
--filter '+ /lib' \
|
||||
--filter '- /*'
|
||||
rsync ./build/install/antville/static/ ${{ inputs.hostname }}:./apps/antville/static/ \
|
||||
--archive --compress --verbose \
|
||||
--filter '+ /fonts' \
|
||||
--filter '+ /formica.html' \
|
||||
--filter '+ /img' \
|
||||
--filter '+ /scripts' \
|
||||
--filter '+ /styles' \
|
||||
--filter '- /*'
|
||||
|
||||
- name: Restart Helma
|
||||
run: ssh ${{ inputs.hostname }} restart
|
||||
|
||||
|
|
42
.github/workflows/stage.yml
vendored
42
.github/workflows/stage.yml
vendored
|
@ -1,42 +0,0 @@
|
|||
name: Deploy (Staging)
|
||||
|
||||
on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
stage:
|
||||
runs-on: antville
|
||||
|
||||
environment:
|
||||
name: stage
|
||||
url: ${{ vars.stage_url }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build with Gradle
|
||||
run: ./gradlew :build
|
||||
|
||||
- name: Publish to staging server
|
||||
# The rsync command applies the same filters as the one in tools/extras/deploy.sh
|
||||
run: |
|
||||
rsync ./build/install/antville/ staging-server:./apps/antville/ \
|
||||
--archive --compress --delete --verbose \
|
||||
--filter '+ /claustra' \
|
||||
--filter '+ /code' \
|
||||
--filter '+ /compat' \
|
||||
--filter '+ /db' \
|
||||
--filter '+ /i18n' \
|
||||
--filter '+ /lib' \
|
||||
--filter '- /*'
|
||||
rsync ./build/install/antville/static/ staging-server:./apps/antville/static/ \
|
||||
--archive --compress --verbose \
|
||||
--filter '+ /fonts' \
|
||||
--filter '+ /formica.html' \
|
||||
--filter '+ /img' \
|
||||
--filter '+ /scripts' \
|
||||
--filter '+ /styles' \
|
||||
--filter '- /*'
|
||||
|
||||
- name: Restart Helma
|
||||
run: ssh staging-server restart
|
||||
|
2
.nvmrc
2
.nvmrc
|
@ -1 +1 @@
|
|||
lts
|
||||
lts/*
|
||||
|
|
|
@ -51,7 +51,7 @@ dependencies {
|
|||
implementation 'org.commonmark:commonmark-ext-autolink:0.24.0'
|
||||
implementation 'org.commonmark:commonmark-ext-gfm-strikethrough:0.24.0'
|
||||
implementation 'org.commonmark:commonmark-ext-gfm-tables:0.24.0'
|
||||
implementation 'org.jsoup:jsoup:1.20.1'
|
||||
implementation 'org.jsoup:jsoup:1.21.1'
|
||||
implementation 'rome:rome:1.0'
|
||||
|
||||
lessCss('org.lesscss:lesscss:1.7.0.1.1') {
|
||||
|
|
|
@ -46,7 +46,7 @@ Proxy.prototype.main_action = function () {
|
|||
|
||||
let content = new java.lang.String(data.content, 'utf-8');
|
||||
|
||||
if (!data.type.startsWith('text/')) {
|
||||
if (data.type && !data.type.startsWith('text/')) {
|
||||
content = new java.lang.String(content.enbase64());
|
||||
}
|
||||
|
||||
|
@ -55,10 +55,10 @@ Proxy.prototype.main_action = function () {
|
|||
} else {
|
||||
res.contentType = data.type;
|
||||
|
||||
if (data.type.startsWith('text/')) {
|
||||
res.write(java.lang.String(data.content, 'utf-8'));
|
||||
} else {
|
||||
if (data.type && !data.type.startsWith('text/')) {
|
||||
res.writeBinary(data.content);
|
||||
} else {
|
||||
res.write(java.lang.String(data.content, 'utf-8'));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
543
code/Global/Robots.js
Normal file
543
code/Global/Robots.js
Normal file
|
@ -0,0 +1,543 @@
|
|||
// Robots parser adapted for Rhino-compatible JavaScript
|
||||
// Source: <https://github.com/samclarke/robots-parser>
|
||||
// Copyright (c) 2014 Sam Clarke
|
||||
// Copyright (c) 2025 Antville.org
|
||||
// MIT License (MIT)
|
||||
|
||||
// Transformation steps:
|
||||
// 1. Add IIFE around the code
|
||||
// 2. Replace module.exports with return statement
|
||||
// 3. Add conditional module.exports for CommonJS support
|
||||
// 4. Add URL class imitation
|
||||
|
||||
var Robots = (() => {
|
||||
/**
|
||||
* Half-baked (read-only) imitation of the URL class of Node.js
|
||||
*/
|
||||
function nodeJsUrl(str, base) {
|
||||
if (!str.includes('://')) {
|
||||
str = (base || 'http://localhost') + str;
|
||||
}
|
||||
|
||||
const url = new java.net.URL(str);
|
||||
const port = url.port < 0 ? '' : url.port;
|
||||
const userInfo = (url.getUserInfo() || "").split(':');
|
||||
|
||||
return {
|
||||
hash: url.ref ? '#' + url.ref : '',
|
||||
href: url.toString(),
|
||||
host: url.host + (port ? ':' + port : port),
|
||||
hostname: url.host,
|
||||
password: userInfo[1] || "",
|
||||
pathname: url.path,
|
||||
origin: url.protocol + '://' + url.host + (port ? ':' + port : port),
|
||||
port,
|
||||
protocol: url.protocol,
|
||||
search: url.queryy ? '?' + url.query : '',
|
||||
searchParams: {
|
||||
get: () => null,
|
||||
set: () => null
|
||||
},
|
||||
username: userInfo[0] || "",
|
||||
};
|
||||
}
|
||||
|
||||
if (typeof URL === 'undefined') {
|
||||
globalThis.URL = nodeJsUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trims the white space from the start and end of the line.
|
||||
*
|
||||
* If the line is an array it will strip the white space from
|
||||
* the start and end of each element of the array.
|
||||
*
|
||||
* @param {string|Array} line
|
||||
* @return {string|Array}
|
||||
* @private
|
||||
*/
|
||||
function trimLine(line) {
|
||||
if (!line) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Array.isArray(line)) {
|
||||
return line.map(trimLine);
|
||||
}
|
||||
|
||||
return String(line).trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove comments from lines
|
||||
*
|
||||
* @param {string} line
|
||||
* @return {string}
|
||||
* @private
|
||||
*/
|
||||
function removeComments(line) {
|
||||
var commentStartIndex = line.indexOf('#');
|
||||
if (commentStartIndex > -1) {
|
||||
return line.substr(0, commentStartIndex);
|
||||
}
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a line at the first occurrence of :
|
||||
*
|
||||
* @param {string} line
|
||||
* @return {Array.<string>}
|
||||
* @private
|
||||
*/
|
||||
function splitLine(line) {
|
||||
var idx = String(line).indexOf(':');
|
||||
|
||||
if (!line || idx < 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [line.slice(0, idx), line.slice(idx + 1)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalises the user-agent string by converting it to
|
||||
* lower case and removing any version numbers.
|
||||
*
|
||||
* @param {string} userAgent
|
||||
* @return {string}
|
||||
* @private
|
||||
*/
|
||||
function formatUserAgent(userAgent) {
|
||||
var formattedUserAgent = userAgent.toLowerCase();
|
||||
|
||||
// Strip the version number from robot/1.0 user agents
|
||||
var idx = formattedUserAgent.indexOf('/');
|
||||
if (idx > -1) {
|
||||
formattedUserAgent = formattedUserAgent.substr(0, idx);
|
||||
}
|
||||
|
||||
return formattedUserAgent.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalises the URL encoding of a path by encoding
|
||||
* unicode characters.
|
||||
*
|
||||
* @param {string} path
|
||||
* @return {string}
|
||||
* @private
|
||||
*/
|
||||
function normaliseEncoding(path) {
|
||||
try {
|
||||
return urlEncodeToUpper(encodeURI(path).replace(/%25/g, '%'));
|
||||
} catch (e) {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert URL encodings to support case.
|
||||
*
|
||||
* e.g.: %2a%ef becomes %2A%EF
|
||||
*
|
||||
* @param {string} path
|
||||
* @return {string}
|
||||
* @private
|
||||
*/
|
||||
function urlEncodeToUpper(path) {
|
||||
return path.replace(/%[0-9a-fA-F]{2}/g, function (match) {
|
||||
return match.toUpperCase();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches a pattern with the specified path
|
||||
*
|
||||
* Uses same algorithm to match patterns as the Google implementation in
|
||||
* google/robotstxt so it should be consistent with the spec.
|
||||
*
|
||||
* @see https://github.com/google/robotstxt/blob/f465f0ede81099dd8bc4aeb2966b3a892bd488b3/robots.cc#L74
|
||||
* @param {string} pattern
|
||||
* @param {string} path
|
||||
* @return {boolean}
|
||||
* @private
|
||||
*/
|
||||
function matches(pattern, path) {
|
||||
// I've added extra comments to try make this easier to understand
|
||||
|
||||
// Stores the lengths of all the current matching substrings.
|
||||
// Maximum number of possible matching lengths is every length in path plus
|
||||
// 1 to handle 0 length too (if pattern starts with * which is zero or more)
|
||||
var matchingLengths = new Array(path.length + 1);
|
||||
var numMatchingLengths = 1;
|
||||
|
||||
// Initially longest match is 0
|
||||
matchingLengths[0] = 0;
|
||||
|
||||
for (var p = 0; p < pattern.length; p++) {
|
||||
// If $ is at the end of pattern then we must match the whole path.
|
||||
// Which is true if the longest matching length matches path length
|
||||
if (pattern[p] === '$' && p + 1 === pattern.length) {
|
||||
return matchingLengths[numMatchingLengths - 1] === path.length;
|
||||
}
|
||||
|
||||
// Handle wildcards
|
||||
if (pattern[p] == '*') {
|
||||
// Wildcard so all substrings minus the current smallest matching
|
||||
// length are matches
|
||||
numMatchingLengths = path.length - matchingLengths[0] + 1;
|
||||
|
||||
// Update matching lengths to include the smallest all the way up
|
||||
// to numMatchingLengths
|
||||
// Don't update smallest possible match as * matches zero or more
|
||||
// so the smallest current match is also valid
|
||||
for (var i = 1; i < numMatchingLengths; i++) {
|
||||
matchingLengths[i] = matchingLengths[i - 1] + 1;
|
||||
}
|
||||
} else {
|
||||
// Check the char at the matching length matches the pattern, if it
|
||||
// does increment it and add it as a valid length, ignore if not.
|
||||
var numMatches = 0;
|
||||
for (var i = 0; i < numMatchingLengths; i++) {
|
||||
if (
|
||||
matchingLengths[i] < path.length &&
|
||||
path[matchingLengths[i]] === pattern[p]
|
||||
) {
|
||||
matchingLengths[numMatches++] = matchingLengths[i] + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// No paths matched the current pattern char so not a match
|
||||
if (numMatches == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
numMatchingLengths = numMatches;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function parseRobots(contents, robots) {
|
||||
var newlineRegex = /\r\n|\r|\n/;
|
||||
var lines = contents
|
||||
.split(newlineRegex)
|
||||
.map(removeComments)
|
||||
.map(splitLine)
|
||||
.map(trimLine);
|
||||
|
||||
var currentUserAgents = [];
|
||||
var isNoneUserAgentState = true;
|
||||
for (var i = 0; i < lines.length; i++) {
|
||||
var line = lines[i];
|
||||
|
||||
if (!line || !line[0]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (line[0].toLowerCase()) {
|
||||
case 'user-agent':
|
||||
if (isNoneUserAgentState) {
|
||||
currentUserAgents.length = 0;
|
||||
}
|
||||
|
||||
if (line[1]) {
|
||||
currentUserAgents.push(formatUserAgent(line[1]));
|
||||
}
|
||||
break;
|
||||
case 'disallow':
|
||||
robots.addRule(currentUserAgents, line[1], false, i + 1);
|
||||
break;
|
||||
case 'allow':
|
||||
robots.addRule(currentUserAgents, line[1], true, i + 1);
|
||||
break;
|
||||
case 'crawl-delay':
|
||||
robots.setCrawlDelay(currentUserAgents, line[1]);
|
||||
break;
|
||||
case 'sitemap':
|
||||
if (line[1]) {
|
||||
robots.addSitemap(line[1]);
|
||||
}
|
||||
break;
|
||||
case 'host':
|
||||
if (line[1]) {
|
||||
robots.setPreferredHost(line[1].toLowerCase());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
isNoneUserAgentState = line[0].toLowerCase() !== 'user-agent';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if a pattern is allowed by the specified rules.
|
||||
*
|
||||
* @param {string} path
|
||||
* @param {Array.<Object>} rules
|
||||
* @return {Object?}
|
||||
* @private
|
||||
*/
|
||||
function findRule(path, rules) {
|
||||
var matchedRule = null;
|
||||
|
||||
for (var i = 0; i < rules.length; i++) {
|
||||
var rule = rules[i];
|
||||
|
||||
if (!matches(rule.pattern, path)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// The longest matching rule takes precedence
|
||||
// If rules are the same length then allow takes precedence
|
||||
if (!matchedRule || rule.pattern.length > matchedRule.pattern.length) {
|
||||
matchedRule = rule;
|
||||
} else if (
|
||||
rule.pattern.length == matchedRule.pattern.length &&
|
||||
rule.allow &&
|
||||
!matchedRule.allow
|
||||
) {
|
||||
matchedRule = rule;
|
||||
}
|
||||
}
|
||||
|
||||
return matchedRule;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts provided string into an URL object.
|
||||
*
|
||||
* Will return null if provided string is not a valid URL.
|
||||
*
|
||||
* @param {string} url
|
||||
* @return {?URL}
|
||||
* @private
|
||||
*/
|
||||
function parseUrl(url) {
|
||||
try {
|
||||
// Specify a URL to be used with relative paths
|
||||
// Using non-existent subdomain so can never cause conflict unless
|
||||
// trying to crawl it but doesn't exist and even if tried worst that can
|
||||
// happen is it allows relative URLs on it.
|
||||
var url = new URL(url, 'http://robots-relative.samclarke.com/');
|
||||
|
||||
if (!url.port) {
|
||||
url.port = url.protocol === 'https:' ? 443 : 80;
|
||||
}
|
||||
|
||||
return url;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function Robots(url, contents) {
|
||||
this._url = parseUrl(url) || {};
|
||||
this._rules = Object.create(null);
|
||||
this._sitemaps = [];
|
||||
this._preferredHost = null;
|
||||
|
||||
parseRobots(contents || '', this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the specified allow/deny rule to the rules
|
||||
* for the specified user-agents.
|
||||
*
|
||||
* @param {Array.<string>} userAgents
|
||||
* @param {string} pattern
|
||||
* @param {boolean} allow
|
||||
* @param {number} [lineNumber] Should use 1-based indexing
|
||||
*/
|
||||
Robots.prototype.addRule = function (userAgents, pattern, allow, lineNumber) {
|
||||
var rules = this._rules;
|
||||
|
||||
userAgents.forEach(function (userAgent) {
|
||||
rules[userAgent] = rules[userAgent] || [];
|
||||
|
||||
if (!pattern) {
|
||||
return;
|
||||
}
|
||||
|
||||
rules[userAgent].push({
|
||||
pattern: normaliseEncoding(pattern),
|
||||
allow: allow,
|
||||
lineNumber: lineNumber
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Adds the specified delay to the specified user agents.
|
||||
*
|
||||
* @param {Array.<string>} userAgents
|
||||
* @param {string} delayStr
|
||||
*/
|
||||
Robots.prototype.setCrawlDelay = function (userAgents, delayStr) {
|
||||
var rules = this._rules;
|
||||
var delay = Number(delayStr);
|
||||
|
||||
userAgents.forEach(function (userAgent) {
|
||||
rules[userAgent] = rules[userAgent] || [];
|
||||
|
||||
if (isNaN(delay)) {
|
||||
return;
|
||||
}
|
||||
|
||||
rules[userAgent].crawlDelay = delay;
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Add a sitemap
|
||||
*
|
||||
* @param {string} url
|
||||
*/
|
||||
Robots.prototype.addSitemap = function (url) {
|
||||
this._sitemaps.push(url);
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets the preferred host name
|
||||
*
|
||||
* @param {string} url
|
||||
*/
|
||||
Robots.prototype.setPreferredHost = function (url) {
|
||||
this._preferredHost = url;
|
||||
};
|
||||
|
||||
Robots.prototype._getRule = function (url, ua, explicit) {
|
||||
var parsedUrl = parseUrl(url) || {};
|
||||
var userAgent = formatUserAgent(ua || '*');
|
||||
|
||||
// The base URL must match otherwise this robots.txt is not valid for it.
|
||||
if (
|
||||
parsedUrl.protocol !== this._url.protocol ||
|
||||
parsedUrl.hostname !== this._url.hostname ||
|
||||
parsedUrl.port !== this._url.port
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
var rules = this._rules[userAgent];
|
||||
if (!explicit) {
|
||||
rules = rules || this._rules['*'];
|
||||
}
|
||||
rules = rules || [];
|
||||
var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search);
|
||||
var rule = findRule(path, rules);
|
||||
|
||||
return rule;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns true if allowed, false if not allowed.
|
||||
*
|
||||
* Will return undefined if the URL is not valid for
|
||||
* this robots.txt file.
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {string?} ua
|
||||
* @return {boolean?}
|
||||
*/
|
||||
Robots.prototype.isAllowed = function (url, ua) {
|
||||
var rule = this._getRule(url, ua, false);
|
||||
|
||||
if (typeof rule === 'undefined') {
|
||||
return;
|
||||
}
|
||||
|
||||
return !rule || rule.allow;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the line number of the matching directive for the specified
|
||||
* URL and user-agent if any.
|
||||
*
|
||||
* The line numbers start at 1 and go up (1-based indexing).
|
||||
*
|
||||
* Return -1 if there is no matching directive. If a rule is manually
|
||||
* added without a lineNumber then this will return undefined for that
|
||||
* rule.
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {string?} ua
|
||||
* @return {number?}
|
||||
*/
|
||||
Robots.prototype.getMatchingLineNumber = function (url, ua) {
|
||||
var rule = this._getRule(url, ua, false);
|
||||
|
||||
return rule ? rule.lineNumber : -1;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the opposite of isAllowed()
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {string?} ua
|
||||
* @return {boolean}
|
||||
*/
|
||||
Robots.prototype.isDisallowed = function (url, ua) {
|
||||
return !this.isAllowed(url, ua);
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns trues if explicitly disallowed
|
||||
* for the specified user agent (User Agent wildcards are discarded).
|
||||
*
|
||||
* This will return undefined if the URL is not valid for this robots.txt file.
|
||||
*
|
||||
* @param {string} url
|
||||
* @param {string} ua
|
||||
* @return {boolean?}
|
||||
*/
|
||||
Robots.prototype.isExplicitlyDisallowed = function (url, ua) {
|
||||
var rule = this._getRule(url, ua, true);
|
||||
if (typeof rule === 'undefined') {
|
||||
return;
|
||||
}
|
||||
|
||||
return !(!rule || rule.allow);
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the crawl delay if there is one.
|
||||
*
|
||||
* Will return undefined if there is no crawl delay set.
|
||||
*
|
||||
* @param {string} ua
|
||||
* @return {number?}
|
||||
*/
|
||||
Robots.prototype.getCrawlDelay = function (ua) {
|
||||
var userAgent = formatUserAgent(ua || '*');
|
||||
|
||||
return (this._rules[userAgent] || this._rules['*'] || {}).crawlDelay;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the preferred host if there is one.
|
||||
*
|
||||
* @return {string?}
|
||||
*/
|
||||
Robots.prototype.getPreferredHost = function () {
|
||||
return this._preferredHost;
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns an array of sitemap URLs if there are any.
|
||||
*
|
||||
* @return {Array.<string>}
|
||||
*/
|
||||
Robots.prototype.getSitemaps = function () {
|
||||
return this._sitemaps.slice(0);
|
||||
};
|
||||
|
||||
return Robots;
|
||||
})();
|
||||
|
||||
if (typeof module !== 'undefined' && module.exports) {
|
||||
module.exports = Robots;
|
||||
}
|
|
@ -140,10 +140,18 @@ HopObject.prototype.onRequest = function() {
|
|||
}
|
||||
}
|
||||
|
||||
// Set up layout handler and skin path
|
||||
HopObject.confirmConstructor(Layout);
|
||||
res.handlers.layout = res.handlers.site.layout || new Layout;
|
||||
res.skinpath = res.handlers.layout.getSkinPath();
|
||||
|
||||
if (res.handlers.site.enforceRobotsTxt()) {
|
||||
res.status = 403
|
||||
res.data.error = gettext('The <a href="{0}">robots.txt</a> file disallows access to this page.', res.handlers.site.href('robots.txt'));
|
||||
root.error_action();
|
||||
res.stop();
|
||||
}
|
||||
|
||||
if (!this.getPermission(req.action)) {
|
||||
if (!session.user) {
|
||||
User.setLocation(root.href() + req.path);
|
||||
|
|
|
@ -202,7 +202,23 @@ Members.prototype.login_action = function() {
|
|||
}
|
||||
|
||||
res.message = gettext('Welcome to {0}, {1}. Have fun!', res.handlers.site.getTitle(), user.name);
|
||||
res.redirect(User.getLocation() || this._parent.href());
|
||||
|
||||
const location = User.getLocation() || this._parent.href();
|
||||
|
||||
// If the requested host is outside of the cookie domain, redirect and login to the root site, too
|
||||
if (this._parent !== root && !req.getHeader("Host").includes(app.appsProperties.cookieDomain)) {
|
||||
const token = java.util.UUID.randomUUID();
|
||||
const digest = session.user.getDigest(token);
|
||||
session.user.setMetadata('rootCookieToken', token);
|
||||
res.redirect(
|
||||
root.href('cookie')
|
||||
+ '?digest=' + encodeURIComponent(digest)
|
||||
+ '&name=' + encodeURIComponent(req.postParams.name)
|
||||
+ '&location=' + encodeURIComponent(location)
|
||||
);
|
||||
}
|
||||
|
||||
res.redirect(location);
|
||||
} catch (ex) {
|
||||
res.message = ex;
|
||||
}
|
||||
|
|
|
@ -136,16 +136,16 @@
|
|||
(<% param.helmaBuildDate %>)
|
||||
</dd>
|
||||
<dt><% gettext "Scripting Engine" %></dt>
|
||||
<dd><a href="https://github.com/mozilla/rhino"><% param.rhino %></a></dd>
|
||||
<dd><a href="https://github.com/mozilla/rhino/releases/tag/<% param.rhino | replace \\. _ | replace \\s '' %>_Release"><% param.rhino %></a></dd>
|
||||
<dt><% gettext "Webserver" %></dt>
|
||||
<dd><a href="https://github.com/jetty/jetty.project">Jetty <% param.jetty %></a></dd>
|
||||
<dd><a href="https://github.com/jetty/jetty.project/releases/tag/jetty-<% param.jetty %>">Jetty <% param.jetty %></a></dd>
|
||||
<dt><% gettext "Servlet Interface" %></dt>
|
||||
<dd>
|
||||
<a href="https://docs.oracle.com/javaee/7/api/index.html?javax/servlet/package-summary.html">
|
||||
Javax <% param.servlet %>
|
||||
<a href="https://jakarta.ee/specifications/servlet/<% param.servlet %>">
|
||||
Jakarta <% param.servlet %>
|
||||
</a>
|
||||
</dd>
|
||||
<dt><% gettext "Virtual Machine" %></dt>
|
||||
<dd><a href="https://openjdk.org">Java <% param.java %></a></dd>
|
||||
<dd><a href="https://openjdk.org/projects/jdk/<% param.java | replace \\.\\d+\\.\\d+$ '' %>">Java <% param.java %></a></dd>
|
||||
</dl>
|
||||
</div>
|
||||
|
|
|
@ -94,6 +94,7 @@ Root.prototype.getPermission = function(action) {
|
|||
switch (action) {
|
||||
case '.':
|
||||
case 'main':
|
||||
case 'cookie':
|
||||
case 'debug':
|
||||
case 'default.hook':
|
||||
case 'favicon.ico':
|
||||
|
@ -367,6 +368,23 @@ Root.prototype.mrtg_action = function() {
|
|||
return;
|
||||
}
|
||||
|
||||
// Login to the root site if Members#login_action() redirects here
|
||||
// This way custom domains are getting the default domain cookie, too
|
||||
Root.prototype.cookie_action = function() {
|
||||
if (req.data.digest && req.data.name) {
|
||||
const user = User.getByName(req.data.name);
|
||||
if (user) {
|
||||
const token = user.getMetadata("rootCookieToken");
|
||||
const digest = user.getDigest(token);
|
||||
if (digest === req.data.digest) {
|
||||
session.login(user);
|
||||
user.deleteMetadata("rootCookieToken");
|
||||
}
|
||||
}
|
||||
}
|
||||
res.redirect(req.data.location || req.data.http_referer || root.href());
|
||||
};
|
||||
|
||||
/**
|
||||
* Catch some undefined macro handlers, then delegate to the super prototype.
|
||||
* @param {String} name
|
||||
|
|
|
@ -143,6 +143,22 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div class='uk-form-row'>
|
||||
<label class='uk-form-label' for='trollFilter'>
|
||||
<% gettext 'Robot rules' %>
|
||||
</label>
|
||||
|
||||
<div class='uk-form-controls'>
|
||||
<label>
|
||||
<% site.checkbox robotsTxtMode %>
|
||||
<% gettext enforced %>
|
||||
</label>
|
||||
<p class="uk-form-help-block">
|
||||
<% gettext 'Edit the rules in the <a href="{0}Site/robots/edit">robots.txt</a> skin.' <% site.layout.skins.href %> %>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class='uk-form-row'>
|
||||
<label class='uk-form-label' for='trollFilter'>
|
||||
<% gettext 'Troll Filter' %>
|
||||
|
|
|
@ -39,6 +39,7 @@ this.handleMetadata('notificationMode');
|
|||
this.handleMetadata('notified');
|
||||
this.handleMetadata('pageSize');
|
||||
this.handleMetadata('pageMode');
|
||||
this.handleMetadata('robotsTxtMode');
|
||||
this.handleMetadata('spamfilter');
|
||||
this.handleMetadata('tagline');
|
||||
this.handleMetadata('timeZone');
|
||||
|
@ -46,7 +47,7 @@ this.handleMetadata('title');
|
|||
this.handleMetadata('trollFilter');
|
||||
|
||||
/**
|
||||
* Ffunction
|
||||
* @function
|
||||
* @returns {String[]}
|
||||
* @see defineConstants
|
||||
*/
|
||||
|
@ -94,6 +95,13 @@ Site.getNotificationModes = defineConstants(Site, markgettext('Nobody'),
|
|||
*/
|
||||
Site.getCallbackModes = defineConstants(Site, markgettext('disabled'),
|
||||
markgettext('enabled'));
|
||||
/**
|
||||
* @function
|
||||
* @returns {String[]}
|
||||
* @see defineConstants
|
||||
*/
|
||||
Site.getRobotsTxtModes = defineConstants(Site, markgettext('suggest'),
|
||||
markgettext('enforce'));
|
||||
|
||||
/**
|
||||
* @param {String} name A unique identifier also used in the URL of a site
|
||||
|
@ -132,6 +140,7 @@ Site.add = function(data, user) {
|
|||
configured: now,
|
||||
created: now,
|
||||
creator: user,
|
||||
robotsTxtMode: Site.SUGGEST,
|
||||
modified: now,
|
||||
modifier: user,
|
||||
status: user.status === User.PRIVILEGED ? Site.TRUSTED : user.status,
|
||||
|
@ -367,6 +376,8 @@ Site.prototype.getFormOptions = function(name) {
|
|||
switch (name) {
|
||||
case 'archiveMode':
|
||||
return Site.getArchiveModes();
|
||||
case 'callbackMode':
|
||||
return Site.getCallbackModes();
|
||||
case 'commentMode':
|
||||
return Site.getCommentModes();
|
||||
case 'locale':
|
||||
|
@ -379,12 +390,12 @@ Site.prototype.getFormOptions = function(name) {
|
|||
return Site.getNotificationModes();
|
||||
case 'pageMode':
|
||||
return Site.getPageModes();
|
||||
case 'robotsTxtMode':
|
||||
return Site.getRobotsTxtModes();
|
||||
case 'status':
|
||||
return Site.getStatus();
|
||||
case 'timeZone':
|
||||
return getTimeZones(this.getLocale());
|
||||
case 'callbackMode':
|
||||
return Site.getCallbackModes();
|
||||
default:
|
||||
return HopObject.prototype.getFormOptions.apply(this, arguments);
|
||||
}
|
||||
|
@ -441,8 +452,9 @@ Site.prototype.update = function(data) {
|
|||
archiveMode: data.archiveMode || Site.CLOSED,
|
||||
callbackMode: data.callbackMode || Site.DISABLED,
|
||||
callbackUrl: data.callbackUrl || this.callbackUrl || String.EMPTY,
|
||||
imageDimensionLimits: [data.maxImageWidth, data.maxImageHeight],
|
||||
commentMode: data.commentMode || Site.DISABLED,
|
||||
robotsTxtMode: data.robotsTxtMode || Site.RELAXED,
|
||||
imageDimensionLimits: [data.maxImageWidth, data.maxImageHeight],
|
||||
locale: data.locale || root.getLocale().toString(),
|
||||
mode: data.mode || Site.CLOSED,
|
||||
notificationMode: data.notificationMode || Site.NOBODY,
|
||||
|
@ -477,7 +489,8 @@ Site.prototype.main_css_action = function() {
|
|||
res.push();
|
||||
this.renderSkin('$Site#stylesheet');
|
||||
this.renderSkin('Site#stylesheet');
|
||||
var css = res.pop();
|
||||
var css = res.pop()
|
||||
.replace(/<(\/?style|!).*/g, ''); // TODO: Actually, a compatibility fix (earlier CSS skins contained the <style> element)
|
||||
|
||||
try {
|
||||
lessParser.parse(css, function(error, less) {
|
||||
|
@ -1124,3 +1137,28 @@ Site.prototype.callback = function(ref) {
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Site.prototype.enforceRobotsTxt = function() {
|
||||
if (this.robotsTxtMode !== Site.ENFORCE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Override some URLs to prevent a site from becoming inaccessible even for the owner
|
||||
const overrides = [
|
||||
this.href('edit'),
|
||||
this.href('main.css'),
|
||||
this.href('main.js'),
|
||||
this.href('robots.txt'),
|
||||
this.layout.href(),
|
||||
this.members.href()
|
||||
];
|
||||
|
||||
const robotsTxt = root.renderSkinAsString('Site#robots');
|
||||
const robots = new Robots(this.href('robots.txt'), robotsTxt);
|
||||
|
||||
const href = path.href(req.action);
|
||||
const fullUrl = (href.includes('://') ? '' : this.href()) + href.slice(1);
|
||||
|
||||
return !overrides.some(href => fullUrl.includes(href))
|
||||
&& !robots.isAllowed(fullUrl, req.getHeader('user-agent'));
|
||||
}
|
||||
|
|
|
@ -223,6 +223,9 @@ Skin.prototype.compare_action = function() {
|
|||
res.push();
|
||||
var param = {}, leftLineNumber = rightLineNumber = 0;
|
||||
for (let line of diff) {
|
||||
if (!line) {
|
||||
continue;
|
||||
}
|
||||
if (line.deleted) {
|
||||
param.right = encode(line.value);
|
||||
param.leftStatus = 'added';
|
||||
|
@ -249,7 +252,7 @@ Skin.prototype.compare_action = function() {
|
|||
this.renderSkin('$Skin#difference', param);
|
||||
}
|
||||
}
|
||||
if (line.value !== null) {
|
||||
if (line.value !== null && typeof line.value !== 'undefined') {
|
||||
leftLineNumber += 1;
|
||||
rightLineNumber += 1;
|
||||
param.leftLineNumber = leftLineNumber;
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
antville.url = jdbc:h2:./db/antville;ifexists=true;mode=postgresql
|
||||
antville.url = jdbc:h2:./db/antville;ifexists=true;mode=postgresql;non_keywords=value
|
||||
antville.driver = org.h2.Driver
|
||||
antville.user = antville
|
||||
antville.password = antville
|
||||
|
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Binary file not shown.
2
gradle/wrapper/gradle-wrapper.properties
vendored
2
gradle/wrapper/gradle-wrapper.properties
vendored
|
@ -1,6 +1,6 @@
|
|||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.1-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
|
|
4
gradlew
vendored
4
gradlew
vendored
|
@ -114,7 +114,7 @@ case "$( uname )" in #(
|
|||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
CLASSPATH="\\\"\\\""
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
|
@ -213,7 +213,7 @@ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
|||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
-jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
|
|
4
gradlew.bat
vendored
4
gradlew.bat
vendored
|
@ -70,11 +70,11 @@ goto fail
|
|||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
set CLASSPATH=
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
|
|
File diff suppressed because it is too large
Load diff
344
i18n/de.po
344
i18n/de.po
File diff suppressed because it is too large
Load diff
|
@ -125,6 +125,7 @@ global.messages['de'] = {
|
|||
"Edit Poll": "Umfrage bearbeiten",
|
||||
"Edit Story": "Beitrag bearbeiten",
|
||||
"Edit the filter in the site settings.": "Der Filter kann in den Einstellungen bearbeitet werden.",
|
||||
"Edit the rules in the <a href=\"{0}Site/robots/edit\">robots.txt</a> skin.": "Bearbeiten Sie die Regeln im <a href=\"{0}Site/robots/edit\">robots.txt</a>-Skin.",
|
||||
"Edit {0}.{1}": "{0}.{1} bearbeiten",
|
||||
"Enabled": "Aktiviert",
|
||||
"Enter one filter {0}pattern{1} per line to be applied on every URL in the referrer and backlink lists.": "Geben Sie ein {0}Filter-Schema{1} pro Zeile ein, das für jede Adresse in den Rückverweis-Listen angewendet werden soll.",
|
||||
|
@ -290,6 +291,7 @@ global.messages['de'] = {
|
|||
"Resource type (e.g. Story or Comment)": "Art der Ressource (z.B. Beitrag oder Kommentar)",
|
||||
"Restricted": "Eingeschränkt",
|
||||
"Results": "Ergebnis",
|
||||
"Robot rules": "Regeln für Robots",
|
||||
"Role": "Rolle",
|
||||
"Running": "Laufende",
|
||||
"Running Polls": "Laufende Umfragen",
|
||||
|
@ -359,6 +361,7 @@ global.messages['de'] = {
|
|||
"Terms and Conditions": "Nutzungsbedingungen",
|
||||
"Text": "Text",
|
||||
"Thanks, your vote was registered. You can change your mind until the poll is closed.": "Danke, Ihre Stimme wurde gezählt. Bis die Umfrage beendet ist, können Sie Ihre Meinung jederzeit ändern.",
|
||||
"The <a href=\"{0}\">robots.txt</a> file disallows access to this page.": "Die <a href=\"{0}\">robots.txt</a>-Datei verbietet den Zugriff auf diese Seite.",
|
||||
"The Management": "Die Direktion",
|
||||
"The URL endpoint for each of these APIs is located at": "Die Internet-Adresse für jede dieser Schnittstellen lautet",
|
||||
"The account data will be available for download from here within the next days.": "Die Kontodaten stehen demnächst hier zum Download bereit.",
|
||||
|
@ -529,6 +532,8 @@ global.messages['de'] = {
|
|||
"e-mail": "E-Mail",
|
||||
"e.g. {0}": "z.B. {0}",
|
||||
"enabled": "aktiviert",
|
||||
"enforce": "erzwingen",
|
||||
"enforced": "erzwingen",
|
||||
"export": "Exportieren",
|
||||
"featured": "sichtbar",
|
||||
"file": "Datei",
|
||||
|
@ -578,6 +583,7 @@ global.messages['de'] = {
|
|||
"soon": "in Kürze",
|
||||
"stories": "Beiträge",
|
||||
"story": "Beitrag",
|
||||
"suggest": "vorschlagen",
|
||||
"tag": "Stichwort",
|
||||
"tags": "Stichworte",
|
||||
"tomorrow": "morgen",
|
||||
|
|
164
package-lock.json
generated
164
package-lock.json
generated
|
@ -28,24 +28,24 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
"version": "7.26.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
|
||||
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
|
||||
"integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-validator-identifier": "^7.25.9",
|
||||
"@babel/helper-validator-identifier": "^7.27.1",
|
||||
"js-tokens": "^4.0.0",
|
||||
"picocolors": "^1.0.0"
|
||||
"picocolors": "^1.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/helper-string-parser": {
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
|
||||
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
|
||||
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
@ -53,9 +53,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@babel/helper-validator-identifier": {
|
||||
"version": "7.25.9",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
|
||||
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz",
|
||||
"integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
@ -63,13 +63,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@babel/parser": {
|
||||
"version": "7.27.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.0.tgz",
|
||||
"integrity": "sha512-iaepho73/2Pz7w2eMS0Q5f83+0RKI7i4xmiYeBmDzfRVbQtTOG7Ts0S4HzJVsTMGI9keU8rNfuZr8DKfSt7Yyg==",
|
||||
"version": "7.27.2",
|
||||
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.2.tgz",
|
||||
"integrity": "sha512-QYLs8299NA7WM/bZAdp+CviYYkVoYXlDW2rzliy3chxd1PQjej7JORuMJDJXJUb9g0TT+B99EwaVLKmX+sPXWw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/types": "^7.27.0"
|
||||
"@babel/types": "^7.27.1"
|
||||
},
|
||||
"bin": {
|
||||
"parser": "bin/babel-parser.js"
|
||||
|
@ -79,14 +79,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@babel/types": {
|
||||
"version": "7.27.0",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.0.tgz",
|
||||
"integrity": "sha512-H45s8fVLYjbhFH62dIJ3WtmJ6RSPt/3DRO0ZcT2SUiYiQyz3BLVb9ADEnLl91m74aQPS3AzzeajZHYOalWe3bg==",
|
||||
"version": "7.27.1",
|
||||
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.1.tgz",
|
||||
"integrity": "sha512-+EzkxvLNfiUeKMgy/3luqfsCWFRXLb7U6wNQTk60tovuckwB15B191tJWvpp4HjiQWdJkCxO3Wbvc6jlk3Xb2Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/helper-string-parser": "^7.25.9",
|
||||
"@babel/helper-validator-identifier": "^7.25.9"
|
||||
"@babel/helper-string-parser": "^7.27.1",
|
||||
"@babel/helper-validator-identifier": "^7.27.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
|
@ -214,9 +214,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@npmcli/arborist": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/arborist/-/arborist-9.0.2.tgz",
|
||||
"integrity": "sha512-9z5FgIYd62LxcuCF2BAXnsEo059pGoPv/1E3XkrKBlB9kOQnJ6WSsyOjuGIcZfLAXseamyAif2J7yAVkWNdWzA==",
|
||||
"version": "9.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/arborist/-/arborist-9.1.1.tgz",
|
||||
"integrity": "sha512-dtANj0Y757hrIDBfylk6neUzMi2yOX0+jK/YjwKrjSMOzis/o8APRfo6VCKL9hhodAeBW72xD65aN9gPzwQz8Q==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
|
@ -387,9 +387,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@npmcli/package-json": {
|
||||
"version": "6.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/package-json/-/package-json-6.1.1.tgz",
|
||||
"integrity": "sha512-d5qimadRAUCO4A/Txw71VM7UrRZzV+NPclxz/dc+M6B2oYwjWTjqh8HA/sGQgs9VZuJ6I/P7XIAlJvgrl27ZOw==",
|
||||
"version": "6.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/package-json/-/package-json-6.2.0.tgz",
|
||||
"integrity": "sha512-rCNLSB/JzNvot0SEyXqWZ7tX2B5dD2a1br2Dp0vSYVo5jh8Z0EZ7lS9TsZ1UtziddB1UfNUaMCc538/HztnJGA==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
|
@ -453,9 +453,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@npmcli/redact": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/redact/-/redact-3.2.0.tgz",
|
||||
"integrity": "sha512-NyJXHoZwJE0iUsCDTclXf1bWHJTsshtnp5xUN6F2vY+OLJv6d2cNc4Do6fKNkmPToB0GzoffxRh405ibTwG+Og==",
|
||||
"version": "3.2.2",
|
||||
"resolved": "https://registry.npmjs.org/@npmcli/redact/-/redact-3.2.2.tgz",
|
||||
"integrity": "sha512-7VmYAmk4csGv08QzrDKScdzn11jHPFGyqJW39FyPgPuAp3zIaUmuCo1yxw9aGs+NEJuTGQ9Gwqpt93vtJubucg==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"engines": {
|
||||
|
@ -515,9 +515,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sigstore/protobuf-specs": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/protobuf-specs/-/protobuf-specs-0.4.1.tgz",
|
||||
"integrity": "sha512-7MJXQhIm7dWF9zo7rRtMYh8d2gSnc3+JddeQOTIg6gUN7FjcuckZ9EwGq+ReeQtbbl3Tbf5YqRrWxA1DMfIn+w==",
|
||||
"version": "0.4.2",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/protobuf-specs/-/protobuf-specs-0.4.2.tgz",
|
||||
"integrity": "sha512-F2ye+n1INNhqT0MW+LfUEvTUPc/nS70vICJcxorKl7/gV9CO39+EDCw+qHNKEqvsDWk++yGVKCbzK1qLPvmC8g==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
|
@ -543,13 +543,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sigstore/tuf": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/tuf/-/tuf-3.1.0.tgz",
|
||||
"integrity": "sha512-suVMQEA+sKdOz5hwP9qNcEjX6B45R+hFFr4LAWzbRc5O+U2IInwvay/bpG5a4s+qR35P/JK/PiKiRGjfuLy1IA==",
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/tuf/-/tuf-3.1.1.tgz",
|
||||
"integrity": "sha512-eFFvlcBIoGwVkkwmTi/vEQFSva3xs5Ot3WmBcjgjVdiaoelBLQaQ/ZBfhlG0MnG0cmTYScPpk7eDdGDWUcFUmg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@sigstore/protobuf-specs": "^0.4.0",
|
||||
"@sigstore/protobuf-specs": "^0.4.1",
|
||||
"tuf-js": "^3.0.1"
|
||||
},
|
||||
"engines": {
|
||||
|
@ -557,15 +557,15 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@sigstore/verify": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/verify/-/verify-2.1.0.tgz",
|
||||
"integrity": "sha512-kAAM06ca4CzhvjIZdONAL9+MLppW3K48wOFy1TbuaWFW/OMfl8JuTgW0Bm02JB1WJGT/ET2eqav0KTEKmxqkIA==",
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@sigstore/verify/-/verify-2.1.1.tgz",
|
||||
"integrity": "sha512-hVJD77oT67aowHxwT4+M6PGOp+E2LtLdTK3+FC0lBO9T7sYwItDMXZ7Z07IDCvR1M717a4axbIWckrW67KMP/w==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@sigstore/bundle": "^3.1.0",
|
||||
"@sigstore/core": "^2.0.0",
|
||||
"@sigstore/protobuf-specs": "^0.4.0"
|
||||
"@sigstore/protobuf-specs": "^0.4.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.17.0 || >=20.5.0"
|
||||
|
@ -773,9 +773,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/asn1.js/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -941,9 +941,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/bn.js": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.1.tgz",
|
||||
"integrity": "sha512-eXRvHzWyYPBuB4NBy0cmYQjGitUrtqwbvlzP3G6VFnNRbsZQIxQ10PbKKHt8gZ/HW/D/747aDl+QkDqg3KQLMQ==",
|
||||
"version": "5.2.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.2.tgz",
|
||||
"integrity": "sha512-v2YAxEmKaBLahNwE1mjp4WON6huMNeuDvagFZW+ASCuA/ku0bXR9hSMw0XpiqMoA3+rmnyck/tPRSFQkoC9Cuw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -1621,9 +1621,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/create-ecdh/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -1742,9 +1742,9 @@
|
|||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz",
|
||||
"integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
|
||||
"version": "4.4.1",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz",
|
||||
"integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
@ -1876,9 +1876,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/diffie-hellman/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -1942,9 +1942,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/elliptic/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -2240,9 +2240,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/generate-license-file/node_modules/glob": {
|
||||
"version": "11.0.1",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-11.0.1.tgz",
|
||||
"integrity": "sha512-zrQDm8XPnYEKawJScsnM0QzobJxlT/kHOOlRTio8IH/GrmxRE5fjllkzdaHclIuNjUQTJYH2xHNIGfdpJkDJUw==",
|
||||
"version": "11.0.2",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-11.0.2.tgz",
|
||||
"integrity": "sha512-YT7U7Vye+t5fZ/QMkBFrTJ7ZQxInIUjwyAjVj84CYXqgBdv30MFUPGnBR6sQaVq6Is15wYJUsnzTuWaGRBhBAQ==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
|
@ -2264,9 +2264,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/generate-license-file/node_modules/jackspeak": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.1.0.tgz",
|
||||
"integrity": "sha512-9DDdhb5j6cpeitCbvLO7n7J4IxnbM6hoF6O1g4HQ5TfhvvKN8ywDM7668ZhMHRqVmxqhps/F6syWK2KcPxYlkw==",
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.1.1.tgz",
|
||||
"integrity": "sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==",
|
||||
"dev": true,
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
|
@ -2573,9 +2573,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/http-cache-semantics": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
|
||||
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==",
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz",
|
||||
"integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==",
|
||||
"dev": true,
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
|
@ -3453,9 +3453,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/miller-rabin/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -4555,9 +4555,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/public-encrypt/node_modules/bn.js": {
|
||||
"version": "4.12.1",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
|
||||
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
|
||||
"version": "4.12.2",
|
||||
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
|
||||
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
@ -4836,9 +4836,9 @@
|
|||
"optional": true
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.7.1",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.1.tgz",
|
||||
"integrity": "sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA==",
|
||||
"version": "7.7.2",
|
||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
|
||||
"integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
|
@ -5518,9 +5518,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/tinyglobby": {
|
||||
"version": "0.2.13",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz",
|
||||
"integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==",
|
||||
"version": "0.2.14",
|
||||
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
|
||||
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
@ -6031,9 +6031,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "3.24.3",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz",
|
||||
"integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==",
|
||||
"version": "3.25.28",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.28.tgz",
|
||||
"integrity": "sha512-/nt/67WYKnr5by3YS7LroZJbtcCBurDKKPBPWWzaxvVCGuG/NOsiKkrjoOhI8mJ+SQUXEbUzeB3S+6XDUEEj7Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
|
|
912
tests/robots.js
Normal file
912
tests/robots.js
Normal file
|
@ -0,0 +1,912 @@
|
|||
// Unit tests of the robots parser
|
||||
// Source: <https://github.com/samclarke/robots-parser/blob/master/test/Robots.js>
|
||||
// Copyright (c) 2014 Sam Clarke
|
||||
// MIT License (MIT)
|
||||
|
||||
// Run with `npx nyc --reporter=text-summary --reporter=html --reporter=lcovonly mocha tests/robots.js`
|
||||
|
||||
// Set up the test environment with Antville’s version of the robots parser
|
||||
const Robots = require('../code/Global/Robots.js');
|
||||
const robotsParser = (url, contents) => new Robots(url, contents);
|
||||
|
||||
const { expect } = require('chai');
|
||||
|
||||
function testRobots(url, contents, allowed, disallowed) {
|
||||
var robots = robotsParser(url, contents);
|
||||
|
||||
allowed.forEach(function (url) {
|
||||
expect(robots.isAllowed(url)).to.equal(true);
|
||||
});
|
||||
|
||||
disallowed.forEach(function (url) {
|
||||
expect(robots.isDisallowed(url)).to.equal(true);
|
||||
});
|
||||
}
|
||||
|
||||
describe('Robots', function () {
|
||||
it('should parse the disallow directive', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should parse the allow directive', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html',
|
||||
'Allow: /fish/test.html',
|
||||
'Allow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/fish/test.html',
|
||||
'http://www.example.com/Test.html',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should parse patterns', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish*.php',
|
||||
'Disallow: /*.dext$',
|
||||
'Disallow: /dir*'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/Fish.PHP',
|
||||
'http://www.example.com/Fish.dext1',
|
||||
'http://www.example.com/folder/dir.html',
|
||||
'http://www.example.com/folder/dir/test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish.php',
|
||||
'http://www.example.com/fishheads/catfish.php?parameters',
|
||||
'http://www.example.com/AnYthInG.dext',
|
||||
'http://www.example.com/Fish.dext.dext',
|
||||
'http://www.example.com/dir/test.html',
|
||||
'http://www.example.com/directory.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should have the correct order precedence for allow and disallow', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish*.php',
|
||||
'Allow: /fish/index.php',
|
||||
'Disallow: /test',
|
||||
'Allow: /test/',
|
||||
'Disallow: /aa/',
|
||||
'Allow: /aa/',
|
||||
'Allow: /bb/',
|
||||
'Disallow: /bb/',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/test/index.html',
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/test/',
|
||||
'http://www.example.com/aa/',
|
||||
'http://www.example.com/bb/',
|
||||
'http://www.example.com/x/'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish.php',
|
||||
'http://www.example.com/fishheads/catfish.php?parameters',
|
||||
'http://www.example.com/test'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should have the correct order precedence for wildcards', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /*/',
|
||||
'Allow: /x/',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/x/',
|
||||
'http://www.example.com/fish.php',
|
||||
'http://www.example.com/test'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/a/',
|
||||
'http://www.example.com/xx/',
|
||||
'http://www.example.com/test/index.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should parse lines delimitated by \\r', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\r');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should parse lines delimitated by \\r\\n', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\r\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
|
||||
it('should parse lines delimitated by mixed line endings', function () {
|
||||
var contents = [
|
||||
'User-agent: *\r',
|
||||
'Disallow: /fish/\r\n',
|
||||
'Disallow: /test.html\n\n'
|
||||
].join('');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should ignore rules that are not in a group', function () {
|
||||
var contents = [
|
||||
'Disallow: /secret.html',
|
||||
'Disallow: /test',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/secret.html',
|
||||
'http://www.example.com/test/index.html',
|
||||
'http://www.example.com/test/'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, []);
|
||||
});
|
||||
|
||||
|
||||
it('should ignore comments', function () {
|
||||
var contents = [
|
||||
'#',
|
||||
'# This is a comment',
|
||||
'#',
|
||||
'User-agent: *',
|
||||
'# This is a comment',
|
||||
'Disallow: /fish/ # ignore',
|
||||
'# Disallow: fish',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should ignore invalid lines', function () {
|
||||
var contents = [
|
||||
'invalid line',
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
':::::another invalid line:::::',
|
||||
'Disallow: /test.html',
|
||||
'Unknown: tule'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should ignore empty user-agent lines', function () {
|
||||
var contents = [
|
||||
'User-agent:',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html',
|
||||
'http://www.example.com/fish/index.php',
|
||||
'http://www.example.com/fish/',
|
||||
'http://www.example.com/test.html'
|
||||
];
|
||||
|
||||
var disallowed = [];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should support groups with multiple user agents (case insensitive)', function () {
|
||||
var contents = [
|
||||
'User-agent: agenta',
|
||||
'User-agent: agentb',
|
||||
'Disallow: /fish',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.isAllowed("http://www.example.com/fish", "agenta")).to.equal(false);
|
||||
});
|
||||
|
||||
it('should return undefined for invalid urls', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /secret.html',
|
||||
'Disallow: /test',
|
||||
].join('\n');
|
||||
|
||||
var invalidUrls = [
|
||||
'http://example.com/secret.html',
|
||||
'http://ex ample.com/secret.html',
|
||||
'http://www.example.net/test/index.html',
|
||||
'http://www.examsple.com/test/',
|
||||
'example.com/test/',
|
||||
':::::;;`\\|/.example.com/test/'
|
||||
];
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
invalidUrls.forEach(function (url) {
|
||||
expect(robots.isAllowed(url)).to.equal(undefined);
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle Unicode, urlencoded and punycode URLs', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /secret.html',
|
||||
'Disallow: /test',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.münich.com/index.html',
|
||||
'http://www.xn--mnich-kva.com/index.html',
|
||||
'http://www.m%C3%BCnich.com/index.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.münich.com/secret.html',
|
||||
'http://www.xn--mnich-kva.com/secret.html',
|
||||
'http://www.m%C3%BCnich.com/secret.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.münich.com/robots.txt', contents, allowed, disallowed);
|
||||
testRobots('http://www.xn--mnich-kva.com/robots.txt', contents, allowed, disallowed);
|
||||
testRobots('http://www.m%C3%BCnich.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should handle Unicode and urlencoded paths', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /%CF%80',
|
||||
'Disallow: /%e2%9d%83',
|
||||
'Disallow: /%a%a',
|
||||
'Disallow: /💩',
|
||||
'Disallow: /✼*t$',
|
||||
'Disallow: /%E2%9C%A4*t$',
|
||||
'Disallow: /✿%a',
|
||||
'Disallow: /http%3A%2F%2Fexample.org'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/✼testing',
|
||||
'http://www.example.com/%E2%9C%BCtesting',
|
||||
'http://www.example.com/✤testing',
|
||||
'http://www.example.com/%E2%9C%A4testing',
|
||||
'http://www.example.com/http://example.org',
|
||||
'http://www.example.com/http:%2F%2Fexample.org'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/%CF%80',
|
||||
'http://www.example.com/%CF%80/index.html',
|
||||
'http://www.example.com/π',
|
||||
'http://www.example.com/π/index.html',
|
||||
'http://www.example.com/%e2%9d%83',
|
||||
'http://www.example.com/%E2%9D%83/index.html',
|
||||
'http://www.example.com/❃',
|
||||
'http://www.example.com/❃/index.html',
|
||||
'http://www.example.com/%F0%9F%92%A9',
|
||||
'http://www.example.com/%F0%9F%92%A9/index.html',
|
||||
'http://www.example.com/💩',
|
||||
'http://www.example.com/💩/index.html',
|
||||
'http://www.example.com/%a%a',
|
||||
'http://www.example.com/%a%a/index.html',
|
||||
'http://www.example.com/✼test',
|
||||
'http://www.example.com/%E2%9C%BCtest',
|
||||
'http://www.example.com/✤test',
|
||||
'http://www.example.com/%E2%9C%A4testt',
|
||||
'http://www.example.com/✿%a',
|
||||
'http://www.example.com/%E2%9C%BF%atest',
|
||||
'http://www.example.com/http%3A%2F%2Fexample.org'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should handle lone high / low surrogates', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /\uD800',
|
||||
'Disallow: /\uDC00'
|
||||
].join('\n');
|
||||
|
||||
// These are invalid so can't be disallowed
|
||||
var allowed = [
|
||||
'http://www.example.com/\uDC00',
|
||||
'http://www.example.com/\uD800'
|
||||
];
|
||||
|
||||
var disallowed = [];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should ignore host case', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /secret.html',
|
||||
'Disallow: /test',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/index.html',
|
||||
'http://www.ExAmPlE.com/index.html',
|
||||
'http://www.EXAMPLE.com/index.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/secret.html',
|
||||
'http://www.ExAmPlE.com/secret.html',
|
||||
'http://www.EXAMPLE.com/secret.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should handle relative paths', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish',
|
||||
'Allow: /fish/test',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('/robots.txt', contents);
|
||||
expect(robots.isAllowed('/fish/test')).to.equal(true);
|
||||
expect(robots.isAllowed('/fish')).to.equal(false);
|
||||
});
|
||||
|
||||
it('should not allow relative paths if domain specified', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish',
|
||||
'Allow: /fish/test',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
|
||||
expect(robots.isAllowed('/fish')).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should not treat invalid robots.txt URLs as relative', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish',
|
||||
'Allow: /fish/test',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('https://ex ample.com/robots.txt', contents);
|
||||
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
|
||||
expect(robots.isAllowed('/fish')).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should not allow URls if domain specified and robots.txt is relative', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish',
|
||||
'Allow: /fish/test',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('/robots.txt', contents);
|
||||
expect(robots.isAllowed('http://www.example.com/fish/test')).to.equal(undefined);
|
||||
expect(robots.isAllowed('http://www.example.com/fish')).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should allow all if empty robots.txt', function () {
|
||||
var allowed = [
|
||||
'http://www.example.com/secret.html',
|
||||
'http://www.example.com/test/index.html',
|
||||
'http://www.example.com/test/'
|
||||
];
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', '');
|
||||
|
||||
allowed.forEach(function (url) {
|
||||
expect(robots.isAllowed(url)).to.equal(true);
|
||||
});
|
||||
});
|
||||
|
||||
it('should treat null as allowing all', function () {
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', null);
|
||||
|
||||
expect(robots.isAllowed("http://www.example.com/", "userAgent")).to.equal(true);
|
||||
expect(robots.isAllowed("http://www.example.com/")).to.equal(true);
|
||||
});
|
||||
|
||||
it('should handle invalid robots.txt urls', function () {
|
||||
var contents = [
|
||||
'user-agent: *',
|
||||
'disallow: /',
|
||||
|
||||
'host: www.example.com',
|
||||
'sitemap: /sitemap.xml'
|
||||
].join('\n');
|
||||
|
||||
var sitemapUrls = [
|
||||
undefined,
|
||||
null,
|
||||
'null',
|
||||
':/wom/test/'
|
||||
];
|
||||
|
||||
sitemapUrls.forEach(function (url) {
|
||||
var robots = robotsParser(url, contents);
|
||||
expect(robots.isAllowed('http://www.example.com/index.html')).to.equal(undefined);
|
||||
expect(robots.getPreferredHost()).to.equal('www.example.com');
|
||||
expect(robots.getSitemaps()).to.eql(['/sitemap.xml']);
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse the crawl-delay directive', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow: /d',
|
||||
|
||||
'user-agent: c',
|
||||
'user-agent: d',
|
||||
'crawl-delay: 10'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getCrawlDelay('a')).to.equal(1);
|
||||
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
||||
expect(robots.getCrawlDelay('c')).to.equal(10);
|
||||
expect(robots.getCrawlDelay('d')).to.equal(10);
|
||||
expect(robots.getCrawlDelay()).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should ignore invalid crawl-delay directives', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1.2.1',
|
||||
|
||||
'user-agent: b',
|
||||
'crawl-delay: 1.a0',
|
||||
|
||||
'user-agent: c',
|
||||
'user-agent: d',
|
||||
'crawl-delay: 10a'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getCrawlDelay('a')).to.equal(undefined);
|
||||
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
||||
expect(robots.getCrawlDelay('c')).to.equal(undefined);
|
||||
expect(robots.getCrawlDelay('d')).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should parse the sitemap directive', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1',
|
||||
'sitemap: http://example.com/test.xml',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow: /d',
|
||||
|
||||
'sitemap: /sitemap.xml',
|
||||
'sitemap: http://example.com/test/sitemap.xml '
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getSitemaps()).to.eql([
|
||||
'http://example.com/test.xml',
|
||||
'/sitemap.xml',
|
||||
'http://example.com/test/sitemap.xml'
|
||||
]);
|
||||
});
|
||||
|
||||
it('should parse the host directive', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1',
|
||||
'host: www.example.net',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow: /d',
|
||||
|
||||
'host: example.com'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getPreferredHost()).to.equal('example.com');
|
||||
});
|
||||
|
||||
it('should parse empty and invalid directives', function () {
|
||||
var contents = [
|
||||
'user-agent:',
|
||||
'user-agent:::: a::',
|
||||
'crawl-delay:',
|
||||
'crawl-delay:::: 0:',
|
||||
'host:',
|
||||
'host:: example.com',
|
||||
'sitemap:',
|
||||
'sitemap:: site:map.xml',
|
||||
'disallow:',
|
||||
'disallow::: /:',
|
||||
'allow:',
|
||||
'allow::: /:',
|
||||
].join('\n');
|
||||
|
||||
robotsParser('http://www.example.com/robots.txt', contents);
|
||||
});
|
||||
|
||||
it('should treat only the last host directive as valid', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1',
|
||||
'host: www.example.net',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow: /d',
|
||||
|
||||
'host: example.net',
|
||||
'host: example.com'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getPreferredHost()).to.equal('example.com');
|
||||
});
|
||||
|
||||
it('should return null when there is no host directive', function () {
|
||||
var contents = [
|
||||
'user-agent: a',
|
||||
'crawl-delay: 1',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow: /d',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getPreferredHost()).to.equal(null);
|
||||
});
|
||||
|
||||
it('should fallback to * when a UA has no rules of its own', function () {
|
||||
var contents = [
|
||||
'user-agent: *',
|
||||
'crawl-delay: 1',
|
||||
|
||||
'user-agent: b',
|
||||
'crawl-delay: 12',
|
||||
|
||||
'user-agent: c',
|
||||
'user-agent: d',
|
||||
'crawl-delay: 10'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getCrawlDelay('should-fall-back')).to.equal(1);
|
||||
expect(robots.getCrawlDelay('d')).to.equal(10);
|
||||
expect(robots.getCrawlDelay('dd')).to.equal(1);
|
||||
});
|
||||
|
||||
it('should not fallback to * when a UA has rules', function () {
|
||||
var contents = [
|
||||
'user-agent: *',
|
||||
'crawl-delay: 1',
|
||||
|
||||
'user-agent: b',
|
||||
'disallow:'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getCrawlDelay('b')).to.equal(undefined);
|
||||
});
|
||||
|
||||
it('should handle UAs with object property names', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish',
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
expect(robots.isAllowed('http://www.example.com/fish', 'constructor')).to.equal(false);
|
||||
expect(robots.isAllowed('http://www.example.com/fish', '__proto__')).to.equal(false);
|
||||
});
|
||||
|
||||
it('should ignore version numbers in the UA string', function () {
|
||||
var contents = [
|
||||
'user-agent: *',
|
||||
'crawl-delay: 1',
|
||||
|
||||
'user-agent: b',
|
||||
'crawl-delay: 12',
|
||||
|
||||
'user-agent: c',
|
||||
'user-agent: d',
|
||||
'crawl-delay: 10'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getCrawlDelay('should-fall-back/1.0.0')).to.equal(1);
|
||||
expect(robots.getCrawlDelay('d/12')).to.equal(10);
|
||||
expect(robots.getCrawlDelay('dd / 0-32-3')).to.equal(1);
|
||||
expect(robots.getCrawlDelay('b / 1.0')).to.equal(12);
|
||||
});
|
||||
|
||||
|
||||
it('should return the line number of the matching directive', function () {
|
||||
var contents = [
|
||||
'',
|
||||
'User-agent: *',
|
||||
'',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html',
|
||||
'Allow: /fish/test.html',
|
||||
'Allow: /test.html',
|
||||
'',
|
||||
'User-agent: a',
|
||||
'allow: /',
|
||||
'',
|
||||
'User-agent: b',
|
||||
'disallow: /test',
|
||||
'disallow: /t*t',
|
||||
'',
|
||||
'User-agent: c',
|
||||
'Disallow: /fish*.php',
|
||||
'Allow: /fish/index.php'
|
||||
].join('\n');
|
||||
|
||||
var robots = robotsParser('http://www.example.com/robots.txt', contents);
|
||||
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish')).to.equal(-1);
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish/test.html')).to.equal(6);
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/Test.html')).to.equal(-1);
|
||||
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php')).to.equal(4);
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish/')).to.equal(4);
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/test.html')).to.equal(7);
|
||||
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/test.html', 'a')).to.equal(10);
|
||||
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish.php', 'c')).to.equal(17);
|
||||
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php', 'c')).to.equal(18);
|
||||
});
|
||||
|
||||
it('should handle large wildcards efficiently', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /' + '*'.repeat(2048) + '.html',
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com/' + 'sub'.repeat(2048) + 'folder/index.php',
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/secret.html'
|
||||
];
|
||||
|
||||
const start = Date.now();
|
||||
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
|
||||
const end = Date.now();
|
||||
|
||||
// Should take less than 500 ms (high to allow for variableness of
|
||||
// machines running the test, should normally be much less)
|
||||
expect(end - start).to.be.lessThan(500);
|
||||
});
|
||||
|
||||
it('should honor given port number', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com:8080/fish',
|
||||
'http://www.example.com:8080/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com/fish',
|
||||
'http://www.example.com/Test.html',
|
||||
'http://www.example.com:80/fish',
|
||||
'http://www.example.com:80/Test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com:8080/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should default to port 80 for http: if no port given', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'http://www.example.com:80/fish',
|
||||
'http://www.example.com:80/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com:443/fish',
|
||||
'http://www.example.com:443/Test.html',
|
||||
'http://www.example.com:80/fish/index.php',
|
||||
'http://www.example.com:80/fish/',
|
||||
'http://www.example.com:80/test.html'
|
||||
];
|
||||
|
||||
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should default to port 443 for https: if no port given', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /fish/',
|
||||
'Disallow: /test.html'
|
||||
].join('\n');
|
||||
|
||||
var allowed = [
|
||||
'https://www.example.com:443/fish',
|
||||
'https://www.example.com:443/Test.html',
|
||||
'https://www.example.com/fish',
|
||||
'https://www.example.com/Test.html'
|
||||
];
|
||||
|
||||
var disallowed = [
|
||||
'http://www.example.com:80/fish',
|
||||
'http://www.example.com:80/Test.html',
|
||||
'http://www.example.com:443/fish/index.php',
|
||||
'http://www.example.com:443/fish/',
|
||||
'http://www.example.com:443/test.html'
|
||||
];
|
||||
|
||||
testRobots('https://www.example.com/robots.txt', contents, allowed, disallowed);
|
||||
});
|
||||
|
||||
it('should not be disallowed when wildcard is used in explicit mode', function () {
|
||||
var contents = [
|
||||
'User-agent: *',
|
||||
'Disallow: /',
|
||||
].join('\n')
|
||||
|
||||
var url = 'https://www.example.com/hello'
|
||||
var userAgent = 'SomeBot';
|
||||
var robots = robotsParser(url, contents);
|
||||
|
||||
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(false)
|
||||
});
|
||||
|
||||
it('should be disallowed when user agent equal robots rule in explicit mode', function () {
|
||||
var contents = [
|
||||
'User-agent: SomeBot',
|
||||
'Disallow: /',
|
||||
].join('\n')
|
||||
|
||||
var url = 'https://www.example.com/hello'
|
||||
var userAgent = 'SomeBot';
|
||||
var robots = robotsParser(url, contents);
|
||||
|
||||
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(true)
|
||||
});
|
||||
|
||||
it('should return undefined when given an invalid URL in explicit mode', function () {
|
||||
var contents = [
|
||||
'User-agent: SomeBot',
|
||||
'Disallow: /',
|
||||
].join('\n')
|
||||
|
||||
var url = 'https://www.example.com/hello'
|
||||
var userAgent = 'SomeBot';
|
||||
var robots = robotsParser('http://example.com', contents);
|
||||
|
||||
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(undefined)
|
||||
});
|
||||
});
|
|
@ -11,7 +11,7 @@ clean {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'com.h2database:h2:1.4.200'
|
||||
implementation 'com.h2database:h2:2.2.220'
|
||||
}
|
||||
|
||||
configurations {
|
||||
|
@ -112,9 +112,8 @@ tasks.register('installJars', Copy) {
|
|||
tasks.register('buildDatabase', JavaExec) {
|
||||
description 'Builds the H2 SQL database file.'
|
||||
group 'build'
|
||||
dependsOn ':assemble'
|
||||
|
||||
def inputFile = "$antvilleInstallDir/db/postgre.sql"
|
||||
def inputFile = "${project.rootDir}/db/postgre.sql"
|
||||
def outputDir = "$helmaInstallDir/db"
|
||||
|
||||
inputs.file inputFile
|
||||
|
@ -130,7 +129,7 @@ tasks.register('buildDatabase', JavaExec) {
|
|||
args = [
|
||||
'-continueOnError',
|
||||
'-script', inputFile,
|
||||
'-url', "jdbc:h2:$outputDir/antville",
|
||||
'-url', "jdbc:h2:$outputDir/antville;mode=postgresql;non_keywords=value",
|
||||
'-user', 'antville',
|
||||
'-password', 'antville'
|
||||
]
|
||||
|
@ -165,7 +164,7 @@ task assembleDist {
|
|||
tasks.register('runH2Console', JavaExec) {
|
||||
description 'Runs the H2 SQL database console.'
|
||||
group 'Help'
|
||||
dependsOn 'installJars'
|
||||
dependsOn 'buildDatabase'
|
||||
|
||||
def h2Jar = configurations.library.files.find { jar ->
|
||||
jar.name.startsWith('h2')
|
||||
|
@ -178,9 +177,6 @@ tasks.register('runH2Console', JavaExec) {
|
|||
'-user', 'antville',
|
||||
'-password', 'antville'
|
||||
]
|
||||
|
||||
// standardOutput = new ByteArrayOutputStream()
|
||||
// ignoreExitValue true
|
||||
}
|
||||
|
||||
tasks.register('distZip', Zip) {
|
||||
|
|
|
@ -7,3 +7,8 @@
|
|||
summary {
|
||||
display: revert;
|
||||
}
|
||||
|
||||
// TODO: Actually a compatibility fix. See https://tobi.antville.org/stories/2317931/
|
||||
td[nowrap][width]:not([width$="%"]) {
|
||||
white-space: normal
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue