Compare commits

..

27 commits

Author SHA1 Message Date
77c99be343 Update dependency org.jsoup:jsoup to v1.21.1 2025-06-23 05:42:26 +00:00
87640ed8da Prevent “undefined” showing up in skin diff 2025-06-01 13:00:46 +00:00
ad4e35f452 Prevent exception when a diff line is falsy 2025-06-01 12:41:24 +00:00
ae79a59639
Remove stale code in comments 2025-05-30 22:24:15 +02:00
354c9788c8
Make it less cumbersome to build/run the H2 database/console 2025-05-30 22:24:15 +02:00
67573db0e0
Add work-around allowing the reserved keyword “value“ as identifier
See <https://www.h2database.com/html/commands.html#set_non_keywords>
2025-05-30 22:24:15 +02:00
22013f2267 Update dependency com.h2database:h2 to v2 [SECURITY] 2025-05-29 22:42:28 +00:00
8370b2ba98
Create more useful links from version info 2025-05-29 23:57:04 +02:00
144d2475cb
Prevent error when data.type is undefined in proxy claustra 2025-05-28 23:04:26 +02:00
96ae3ad7fb Lock file maintenance 2025-05-27 18:36:08 +00:00
a7fc1df893
Merge branch 'main' into automatically-set-root-cookie
# Conflicts:
#	.github/workflows/deploy.yml
2025-05-27 20:32:47 +02:00
5b929bf0ca
Add compatibility fix for <td> elements overflowing the layout 2025-05-27 20:29:59 +02:00
0b326e71e6
Add compatibility fix for CSS skins with <style> element 2025-05-27 20:29:59 +02:00
c6d86368c5 Add and translate missing translatable message 2025-05-27 18:24:53 +00:00
1d59aff5a6 Always grant access to robots.txt 2025-05-27 18:24:53 +00:00
50b5c4d09e Slightly reword robot rules setting and translate to German 2025-05-27 18:24:53 +00:00
42a0d7e610 Add routine to enforce rules in robots.txt if corresponding setting is enabled 2025-05-27 18:24:53 +00:00
c5b9a613a8 Add site setting for enforcing rules in robots.txt 2025-05-27 18:24:53 +00:00
362ca05ab8 Adapt robots parser and its tests for Rhino 2025-05-27 18:24:53 +00:00
a7cabf0d63 Add third-party robots parser, including unit tests 2025-05-27 18:24:53 +00:00
f25b3c0b76
Simplify repository workflows
Use deploy workflow for staging, too
2025-05-25 20:58:23 +02:00
543070a94f Implement conditional redirect to root site after login
This way the account can be logged in to the default domain, too
2025-05-25 15:50:34 +00:00
786a110b48 Lock file maintenance 2025-05-25 15:43:49 +00:00
348fa4607d Update dependency gradle to v8.14.1 2025-05-25 15:40:16 +00:00
f90d1e0667
Simplify repository workflows
Use deploy workflow for staging, too
2025-05-25 16:58:53 +02:00
e5216c6c5f
Update NVM configuration for compatibility with official implementation 2025-05-25 16:41:15 +02:00
6ed24ef0d2 Replace hard-coded hostname with input 2025-05-24 20:03:13 +00:00
25 changed files with 2087 additions and 469 deletions

View file

@ -1,15 +1,48 @@
name: Deploy (Production)
name: Deploy
on: workflow_dispatch
on:
workflow_dispatch:
inputs:
hostname:
description: Hostname
type: string
required: true
default: antville.org
jobs:
deploy:
stage:
runs-on: antville
environment:
name: antville.org
url: https://antville.org
name: production
url: ${{ inputs.hostname }}
steps:
- name: Copy files to production server
run: ssh staging-server deploy-antville
- uses: actions/checkout@v4
- name: Build with Gradle
run: ./gradlew :build
- name: Copy files to server
run: |
rsync ./build/install/antville/ ${{ inputs.hostname }}:./apps/antville/ \
--archive --compress --delete --verbose \
--filter '+ /claustra' \
--filter '+ /code' \
--filter '+ /compat' \
--filter '+ /db' \
--filter '+ /i18n' \
--filter '+ /lib' \
--filter '- /*'
rsync ./build/install/antville/static/ ${{ inputs.hostname }}:./apps/antville/static/ \
--archive --compress --verbose \
--filter '+ /fonts' \
--filter '+ /formica.html' \
--filter '+ /img' \
--filter '+ /scripts' \
--filter '+ /styles' \
--filter '- /*'
- name: Restart Helma
run: ssh ${{ inputs.hostname }} restart

View file

@ -1,42 +0,0 @@
name: Deploy (Staging)
on: workflow_dispatch
jobs:
stage:
runs-on: antville
environment:
name: stage
url: ${{ vars.stage_url }}
steps:
- uses: actions/checkout@v4
- name: Build with Gradle
run: ./gradlew :build
- name: Publish to staging server
# The rsync command applies the same filters as the one in tools/extras/deploy.sh
run: |
rsync ./build/install/antville/ staging-server:./apps/antville/ \
--archive --compress --delete --verbose \
--filter '+ /claustra' \
--filter '+ /code' \
--filter '+ /compat' \
--filter '+ /db' \
--filter '+ /i18n' \
--filter '+ /lib' \
--filter '- /*'
rsync ./build/install/antville/static/ staging-server:./apps/antville/static/ \
--archive --compress --verbose \
--filter '+ /fonts' \
--filter '+ /formica.html' \
--filter '+ /img' \
--filter '+ /scripts' \
--filter '+ /styles' \
--filter '- /*'
- name: Restart Helma
run: ssh staging-server restart

2
.nvmrc
View file

@ -1 +1 @@
lts
lts/*

View file

@ -51,7 +51,7 @@ dependencies {
implementation 'org.commonmark:commonmark-ext-autolink:0.24.0'
implementation 'org.commonmark:commonmark-ext-gfm-strikethrough:0.24.0'
implementation 'org.commonmark:commonmark-ext-gfm-tables:0.24.0'
implementation 'org.jsoup:jsoup:1.20.1'
implementation 'org.jsoup:jsoup:1.21.1'
implementation 'rome:rome:1.0'
lessCss('org.lesscss:lesscss:1.7.0.1.1') {

View file

@ -46,7 +46,7 @@ Proxy.prototype.main_action = function () {
let content = new java.lang.String(data.content, 'utf-8');
if (!data.type.startsWith('text/')) {
if (data.type && !data.type.startsWith('text/')) {
content = new java.lang.String(content.enbase64());
}
@ -55,10 +55,10 @@ Proxy.prototype.main_action = function () {
} else {
res.contentType = data.type;
if (data.type.startsWith('text/')) {
res.write(java.lang.String(data.content, 'utf-8'));
} else {
if (data.type && !data.type.startsWith('text/')) {
res.writeBinary(data.content);
} else {
res.write(java.lang.String(data.content, 'utf-8'));
}
}
};

543
code/Global/Robots.js Normal file
View file

@ -0,0 +1,543 @@
// Robots parser adapted for Rhino-compatible JavaScript
// Source: <https://github.com/samclarke/robots-parser>
// Copyright (c) 2014 Sam Clarke
// Copyright (c) 2025 Antville.org
// MIT License (MIT)
// Transformation steps:
// 1. Add IIFE around the code
// 2. Replace module.exports with return statement
// 3. Add conditional module.exports for CommonJS support
// 4. Add URL class imitation
var Robots = (() => {
/**
* Half-baked (read-only) imitation of the URL class of Node.js
*/
function nodeJsUrl(str, base) {
if (!str.includes('://')) {
str = (base || 'http://localhost') + str;
}
const url = new java.net.URL(str);
const port = url.port < 0 ? '' : url.port;
const userInfo = (url.getUserInfo() || "").split(':');
return {
hash: url.ref ? '#' + url.ref : '',
href: url.toString(),
host: url.host + (port ? ':' + port : port),
hostname: url.host,
password: userInfo[1] || "",
pathname: url.path,
origin: url.protocol + '://' + url.host + (port ? ':' + port : port),
port,
protocol: url.protocol,
search: url.queryy ? '?' + url.query : '',
searchParams: {
get: () => null,
set: () => null
},
username: userInfo[0] || "",
};
}
if (typeof URL === 'undefined') {
globalThis.URL = nodeJsUrl;
}
/**
* Trims the white space from the start and end of the line.
*
* If the line is an array it will strip the white space from
* the start and end of each element of the array.
*
* @param {string|Array} line
* @return {string|Array}
* @private
*/
function trimLine(line) {
if (!line) {
return null;
}
if (Array.isArray(line)) {
return line.map(trimLine);
}
return String(line).trim();
}
/**
* Remove comments from lines
*
* @param {string} line
* @return {string}
* @private
*/
function removeComments(line) {
var commentStartIndex = line.indexOf('#');
if (commentStartIndex > -1) {
return line.substr(0, commentStartIndex);
}
return line;
}
/**
* Splits a line at the first occurrence of :
*
* @param {string} line
* @return {Array.<string>}
* @private
*/
function splitLine(line) {
var idx = String(line).indexOf(':');
if (!line || idx < 0) {
return null;
}
return [line.slice(0, idx), line.slice(idx + 1)];
}
/**
* Normalises the user-agent string by converting it to
* lower case and removing any version numbers.
*
* @param {string} userAgent
* @return {string}
* @private
*/
function formatUserAgent(userAgent) {
var formattedUserAgent = userAgent.toLowerCase();
// Strip the version number from robot/1.0 user agents
var idx = formattedUserAgent.indexOf('/');
if (idx > -1) {
formattedUserAgent = formattedUserAgent.substr(0, idx);
}
return formattedUserAgent.trim();
}
/**
* Normalises the URL encoding of a path by encoding
* unicode characters.
*
* @param {string} path
* @return {string}
* @private
*/
function normaliseEncoding(path) {
try {
return urlEncodeToUpper(encodeURI(path).replace(/%25/g, '%'));
} catch (e) {
return path;
}
}
/**
* Convert URL encodings to support case.
*
* e.g.: %2a%ef becomes %2A%EF
*
* @param {string} path
* @return {string}
* @private
*/
function urlEncodeToUpper(path) {
return path.replace(/%[0-9a-fA-F]{2}/g, function (match) {
return match.toUpperCase();
});
}
/**
* Matches a pattern with the specified path
*
* Uses same algorithm to match patterns as the Google implementation in
* google/robotstxt so it should be consistent with the spec.
*
* @see https://github.com/google/robotstxt/blob/f465f0ede81099dd8bc4aeb2966b3a892bd488b3/robots.cc#L74
* @param {string} pattern
* @param {string} path
* @return {boolean}
* @private
*/
function matches(pattern, path) {
// I've added extra comments to try make this easier to understand
// Stores the lengths of all the current matching substrings.
// Maximum number of possible matching lengths is every length in path plus
// 1 to handle 0 length too (if pattern starts with * which is zero or more)
var matchingLengths = new Array(path.length + 1);
var numMatchingLengths = 1;
// Initially longest match is 0
matchingLengths[0] = 0;
for (var p = 0; p < pattern.length; p++) {
// If $ is at the end of pattern then we must match the whole path.
// Which is true if the longest matching length matches path length
if (pattern[p] === '$' && p + 1 === pattern.length) {
return matchingLengths[numMatchingLengths - 1] === path.length;
}
// Handle wildcards
if (pattern[p] == '*') {
// Wildcard so all substrings minus the current smallest matching
// length are matches
numMatchingLengths = path.length - matchingLengths[0] + 1;
// Update matching lengths to include the smallest all the way up
// to numMatchingLengths
// Don't update smallest possible match as * matches zero or more
// so the smallest current match is also valid
for (var i = 1; i < numMatchingLengths; i++) {
matchingLengths[i] = matchingLengths[i - 1] + 1;
}
} else {
// Check the char at the matching length matches the pattern, if it
// does increment it and add it as a valid length, ignore if not.
var numMatches = 0;
for (var i = 0; i < numMatchingLengths; i++) {
if (
matchingLengths[i] < path.length &&
path[matchingLengths[i]] === pattern[p]
) {
matchingLengths[numMatches++] = matchingLengths[i] + 1;
}
}
// No paths matched the current pattern char so not a match
if (numMatches == 0) {
return false;
}
numMatchingLengths = numMatches;
}
}
return true;
}
function parseRobots(contents, robots) {
var newlineRegex = /\r\n|\r|\n/;
var lines = contents
.split(newlineRegex)
.map(removeComments)
.map(splitLine)
.map(trimLine);
var currentUserAgents = [];
var isNoneUserAgentState = true;
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
if (!line || !line[0]) {
continue;
}
switch (line[0].toLowerCase()) {
case 'user-agent':
if (isNoneUserAgentState) {
currentUserAgents.length = 0;
}
if (line[1]) {
currentUserAgents.push(formatUserAgent(line[1]));
}
break;
case 'disallow':
robots.addRule(currentUserAgents, line[1], false, i + 1);
break;
case 'allow':
robots.addRule(currentUserAgents, line[1], true, i + 1);
break;
case 'crawl-delay':
robots.setCrawlDelay(currentUserAgents, line[1]);
break;
case 'sitemap':
if (line[1]) {
robots.addSitemap(line[1]);
}
break;
case 'host':
if (line[1]) {
robots.setPreferredHost(line[1].toLowerCase());
}
break;
}
isNoneUserAgentState = line[0].toLowerCase() !== 'user-agent';
}
}
/**
* Returns if a pattern is allowed by the specified rules.
*
* @param {string} path
* @param {Array.<Object>} rules
* @return {Object?}
* @private
*/
function findRule(path, rules) {
var matchedRule = null;
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (!matches(rule.pattern, path)) {
continue;
}
// The longest matching rule takes precedence
// If rules are the same length then allow takes precedence
if (!matchedRule || rule.pattern.length > matchedRule.pattern.length) {
matchedRule = rule;
} else if (
rule.pattern.length == matchedRule.pattern.length &&
rule.allow &&
!matchedRule.allow
) {
matchedRule = rule;
}
}
return matchedRule;
}
/**
* Converts provided string into an URL object.
*
* Will return null if provided string is not a valid URL.
*
* @param {string} url
* @return {?URL}
* @private
*/
function parseUrl(url) {
try {
// Specify a URL to be used with relative paths
// Using non-existent subdomain so can never cause conflict unless
// trying to crawl it but doesn't exist and even if tried worst that can
// happen is it allows relative URLs on it.
var url = new URL(url, 'http://robots-relative.samclarke.com/');
if (!url.port) {
url.port = url.protocol === 'https:' ? 443 : 80;
}
return url;
} catch (e) {
return null;
}
}
function Robots(url, contents) {
this._url = parseUrl(url) || {};
this._rules = Object.create(null);
this._sitemaps = [];
this._preferredHost = null;
parseRobots(contents || '', this);
}
/**
* Adds the specified allow/deny rule to the rules
* for the specified user-agents.
*
* @param {Array.<string>} userAgents
* @param {string} pattern
* @param {boolean} allow
* @param {number} [lineNumber] Should use 1-based indexing
*/
Robots.prototype.addRule = function (userAgents, pattern, allow, lineNumber) {
var rules = this._rules;
userAgents.forEach(function (userAgent) {
rules[userAgent] = rules[userAgent] || [];
if (!pattern) {
return;
}
rules[userAgent].push({
pattern: normaliseEncoding(pattern),
allow: allow,
lineNumber: lineNumber
});
});
};
/**
* Adds the specified delay to the specified user agents.
*
* @param {Array.<string>} userAgents
* @param {string} delayStr
*/
Robots.prototype.setCrawlDelay = function (userAgents, delayStr) {
var rules = this._rules;
var delay = Number(delayStr);
userAgents.forEach(function (userAgent) {
rules[userAgent] = rules[userAgent] || [];
if (isNaN(delay)) {
return;
}
rules[userAgent].crawlDelay = delay;
});
};
/**
* Add a sitemap
*
* @param {string} url
*/
Robots.prototype.addSitemap = function (url) {
this._sitemaps.push(url);
};
/**
* Sets the preferred host name
*
* @param {string} url
*/
Robots.prototype.setPreferredHost = function (url) {
this._preferredHost = url;
};
Robots.prototype._getRule = function (url, ua, explicit) {
var parsedUrl = parseUrl(url) || {};
var userAgent = formatUserAgent(ua || '*');
// The base URL must match otherwise this robots.txt is not valid for it.
if (
parsedUrl.protocol !== this._url.protocol ||
parsedUrl.hostname !== this._url.hostname ||
parsedUrl.port !== this._url.port
) {
return;
}
var rules = this._rules[userAgent];
if (!explicit) {
rules = rules || this._rules['*'];
}
rules = rules || [];
var path = urlEncodeToUpper(parsedUrl.pathname + parsedUrl.search);
var rule = findRule(path, rules);
return rule;
};
/**
* Returns true if allowed, false if not allowed.
*
* Will return undefined if the URL is not valid for
* this robots.txt file.
*
* @param {string} url
* @param {string?} ua
* @return {boolean?}
*/
Robots.prototype.isAllowed = function (url, ua) {
var rule = this._getRule(url, ua, false);
if (typeof rule === 'undefined') {
return;
}
return !rule || rule.allow;
};
/**
* Returns the line number of the matching directive for the specified
* URL and user-agent if any.
*
* The line numbers start at 1 and go up (1-based indexing).
*
* Return -1 if there is no matching directive. If a rule is manually
* added without a lineNumber then this will return undefined for that
* rule.
*
* @param {string} url
* @param {string?} ua
* @return {number?}
*/
Robots.prototype.getMatchingLineNumber = function (url, ua) {
var rule = this._getRule(url, ua, false);
return rule ? rule.lineNumber : -1;
};
/**
* Returns the opposite of isAllowed()
*
* @param {string} url
* @param {string?} ua
* @return {boolean}
*/
Robots.prototype.isDisallowed = function (url, ua) {
return !this.isAllowed(url, ua);
};
/**
* Returns trues if explicitly disallowed
* for the specified user agent (User Agent wildcards are discarded).
*
* This will return undefined if the URL is not valid for this robots.txt file.
*
* @param {string} url
* @param {string} ua
* @return {boolean?}
*/
Robots.prototype.isExplicitlyDisallowed = function (url, ua) {
var rule = this._getRule(url, ua, true);
if (typeof rule === 'undefined') {
return;
}
return !(!rule || rule.allow);
};
/**
* Gets the crawl delay if there is one.
*
* Will return undefined if there is no crawl delay set.
*
* @param {string} ua
* @return {number?}
*/
Robots.prototype.getCrawlDelay = function (ua) {
var userAgent = formatUserAgent(ua || '*');
return (this._rules[userAgent] || this._rules['*'] || {}).crawlDelay;
};
/**
* Returns the preferred host if there is one.
*
* @return {string?}
*/
Robots.prototype.getPreferredHost = function () {
return this._preferredHost;
};
/**
* Returns an array of sitemap URLs if there are any.
*
* @return {Array.<string>}
*/
Robots.prototype.getSitemaps = function () {
return this._sitemaps.slice(0);
};
return Robots;
})();
if (typeof module !== 'undefined' && module.exports) {
module.exports = Robots;
}

View file

@ -140,10 +140,18 @@ HopObject.prototype.onRequest = function() {
}
}
// Set up layout handler and skin path
HopObject.confirmConstructor(Layout);
res.handlers.layout = res.handlers.site.layout || new Layout;
res.skinpath = res.handlers.layout.getSkinPath();
if (res.handlers.site.enforceRobotsTxt()) {
res.status = 403
res.data.error = gettext('The <a href="{0}">robots.txt</a> file disallows access to this page.', res.handlers.site.href('robots.txt'));
root.error_action();
res.stop();
}
if (!this.getPermission(req.action)) {
if (!session.user) {
User.setLocation(root.href() + req.path);

View file

@ -202,7 +202,23 @@ Members.prototype.login_action = function() {
}
res.message = gettext('Welcome to {0}, {1}. Have fun!', res.handlers.site.getTitle(), user.name);
res.redirect(User.getLocation() || this._parent.href());
const location = User.getLocation() || this._parent.href();
// If the requested host is outside of the cookie domain, redirect and login to the root site, too
if (this._parent !== root && !req.getHeader("Host").includes(app.appsProperties.cookieDomain)) {
const token = java.util.UUID.randomUUID();
const digest = session.user.getDigest(token);
session.user.setMetadata('rootCookieToken', token);
res.redirect(
root.href('cookie')
+ '?digest=' + encodeURIComponent(digest)
+ '&name=' + encodeURIComponent(req.postParams.name)
+ '&location=' + encodeURIComponent(location)
);
}
res.redirect(location);
} catch (ex) {
res.message = ex;
}

View file

@ -136,16 +136,16 @@
(<% param.helmaBuildDate %>)
</dd>
<dt><% gettext "Scripting Engine" %></dt>
<dd><a href="https://github.com/mozilla/rhino"><% param.rhino %></a></dd>
<dd><a href="https://github.com/mozilla/rhino/releases/tag/<% param.rhino | replace \\. _ | replace \\s '' %>_Release"><% param.rhino %></a></dd>
<dt><% gettext "Webserver" %></dt>
<dd><a href="https://github.com/jetty/jetty.project">Jetty <% param.jetty %></a></dd>
<dd><a href="https://github.com/jetty/jetty.project/releases/tag/jetty-<% param.jetty %>">Jetty <% param.jetty %></a></dd>
<dt><% gettext "Servlet Interface" %></dt>
<dd>
<a href="https://docs.oracle.com/javaee/7/api/index.html?javax/servlet/package-summary.html">
Javax <% param.servlet %>
<a href="https://jakarta.ee/specifications/servlet/<% param.servlet %>">
Jakarta <% param.servlet %>
</a>
</dd>
<dt><% gettext "Virtual Machine" %></dt>
<dd><a href="https://openjdk.org">Java <% param.java %></a></dd>
<dd><a href="https://openjdk.org/projects/jdk/<% param.java | replace \\.\\d+\\.\\d+$ '' %>">Java <% param.java %></a></dd>
</dl>
</div>

View file

@ -94,6 +94,7 @@ Root.prototype.getPermission = function(action) {
switch (action) {
case '.':
case 'main':
case 'cookie':
case 'debug':
case 'default.hook':
case 'favicon.ico':
@ -367,6 +368,23 @@ Root.prototype.mrtg_action = function() {
return;
}
// Login to the root site if Members#login_action() redirects here
// This way custom domains are getting the default domain cookie, too
Root.prototype.cookie_action = function() {
if (req.data.digest && req.data.name) {
const user = User.getByName(req.data.name);
if (user) {
const token = user.getMetadata("rootCookieToken");
const digest = user.getDigest(token);
if (digest === req.data.digest) {
session.login(user);
user.deleteMetadata("rootCookieToken");
}
}
}
res.redirect(req.data.location || req.data.http_referer || root.href());
};
/**
* Catch some undefined macro handlers, then delegate to the super prototype.
* @param {String} name

View file

@ -143,6 +143,22 @@
</div>
</div>
<div class='uk-form-row'>
<label class='uk-form-label' for='trollFilter'>
<% gettext 'Robot rules' %>
</label>
<div class='uk-form-controls'>
<label>
<% site.checkbox robotsTxtMode %>
<% gettext enforced %>
</label>
<p class="uk-form-help-block">
<% gettext 'Edit the rules in the <a href="{0}Site/robots/edit">robots.txt</a> skin.' <% site.layout.skins.href %> %>
</p>
</div>
</div>
<div class='uk-form-row'>
<label class='uk-form-label' for='trollFilter'>
<% gettext 'Troll Filter' %>

View file

@ -39,6 +39,7 @@ this.handleMetadata('notificationMode');
this.handleMetadata('notified');
this.handleMetadata('pageSize');
this.handleMetadata('pageMode');
this.handleMetadata('robotsTxtMode');
this.handleMetadata('spamfilter');
this.handleMetadata('tagline');
this.handleMetadata('timeZone');
@ -46,7 +47,7 @@ this.handleMetadata('title');
this.handleMetadata('trollFilter');
/**
* Ffunction
* @function
* @returns {String[]}
* @see defineConstants
*/
@ -94,6 +95,13 @@ Site.getNotificationModes = defineConstants(Site, markgettext('Nobody'),
*/
Site.getCallbackModes = defineConstants(Site, markgettext('disabled'),
markgettext('enabled'));
/**
* @function
* @returns {String[]}
* @see defineConstants
*/
Site.getRobotsTxtModes = defineConstants(Site, markgettext('suggest'),
markgettext('enforce'));
/**
* @param {String} name A unique identifier also used in the URL of a site
@ -132,6 +140,7 @@ Site.add = function(data, user) {
configured: now,
created: now,
creator: user,
robotsTxtMode: Site.SUGGEST,
modified: now,
modifier: user,
status: user.status === User.PRIVILEGED ? Site.TRUSTED : user.status,
@ -367,6 +376,8 @@ Site.prototype.getFormOptions = function(name) {
switch (name) {
case 'archiveMode':
return Site.getArchiveModes();
case 'callbackMode':
return Site.getCallbackModes();
case 'commentMode':
return Site.getCommentModes();
case 'locale':
@ -379,12 +390,12 @@ Site.prototype.getFormOptions = function(name) {
return Site.getNotificationModes();
case 'pageMode':
return Site.getPageModes();
case 'robotsTxtMode':
return Site.getRobotsTxtModes();
case 'status':
return Site.getStatus();
case 'timeZone':
return getTimeZones(this.getLocale());
case 'callbackMode':
return Site.getCallbackModes();
default:
return HopObject.prototype.getFormOptions.apply(this, arguments);
}
@ -441,8 +452,9 @@ Site.prototype.update = function(data) {
archiveMode: data.archiveMode || Site.CLOSED,
callbackMode: data.callbackMode || Site.DISABLED,
callbackUrl: data.callbackUrl || this.callbackUrl || String.EMPTY,
imageDimensionLimits: [data.maxImageWidth, data.maxImageHeight],
commentMode: data.commentMode || Site.DISABLED,
robotsTxtMode: data.robotsTxtMode || Site.RELAXED,
imageDimensionLimits: [data.maxImageWidth, data.maxImageHeight],
locale: data.locale || root.getLocale().toString(),
mode: data.mode || Site.CLOSED,
notificationMode: data.notificationMode || Site.NOBODY,
@ -477,7 +489,8 @@ Site.prototype.main_css_action = function() {
res.push();
this.renderSkin('$Site#stylesheet');
this.renderSkin('Site#stylesheet');
var css = res.pop();
var css = res.pop()
.replace(/<(\/?style|!).*/g, ''); // TODO: Actually, a compatibility fix (earlier CSS skins contained the <style> element)
try {
lessParser.parse(css, function(error, less) {
@ -1124,3 +1137,28 @@ Site.prototype.callback = function(ref) {
}
return;
}
Site.prototype.enforceRobotsTxt = function() {
if (this.robotsTxtMode !== Site.ENFORCE) {
return false;
}
// Override some URLs to prevent a site from becoming inaccessible even for the owner
const overrides = [
this.href('edit'),
this.href('main.css'),
this.href('main.js'),
this.href('robots.txt'),
this.layout.href(),
this.members.href()
];
const robotsTxt = root.renderSkinAsString('Site#robots');
const robots = new Robots(this.href('robots.txt'), robotsTxt);
const href = path.href(req.action);
const fullUrl = (href.includes('://') ? '' : this.href()) + href.slice(1);
return !overrides.some(href => fullUrl.includes(href))
&& !robots.isAllowed(fullUrl, req.getHeader('user-agent'));
}

View file

@ -223,6 +223,9 @@ Skin.prototype.compare_action = function() {
res.push();
var param = {}, leftLineNumber = rightLineNumber = 0;
for (let line of diff) {
if (!line) {
continue;
}
if (line.deleted) {
param.right = encode(line.value);
param.leftStatus = 'added';
@ -249,7 +252,7 @@ Skin.prototype.compare_action = function() {
this.renderSkin('$Skin#difference', param);
}
}
if (line.value !== null) {
if (line.value !== null && typeof line.value !== 'undefined') {
leftLineNumber += 1;
rightLineNumber += 1;
param.leftLineNumber = leftLineNumber;

View file

@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
antville.url = jdbc:h2:./db/antville;ifexists=true;mode=postgresql
antville.url = jdbc:h2:./db/antville;ifexists=true;mode=postgresql;non_keywords=value
antville.driver = org.h2.Driver
antville.user = antville
antville.password = antville

Binary file not shown.

View file

@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.1-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME

4
gradlew vendored
View file

@ -114,7 +114,7 @@ case "$( uname )" in #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
CLASSPATH="\\\"\\\""
# Determine the Java command to use to start the JVM.
@ -213,7 +213,7 @@ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
-jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \
"$@"
# Stop when "xargs" is not available.

4
gradlew.bat vendored
View file

@ -70,11 +70,11 @@ goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
set CLASSPATH=
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %*
:end
@rem End local scope for the variables with windows NT shell

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -125,6 +125,7 @@ global.messages['de'] = {
"Edit Poll": "Umfrage bearbeiten",
"Edit Story": "Beitrag bearbeiten",
"Edit the filter in the site settings.": "Der Filter kann in den Einstellungen bearbeitet werden.",
"Edit the rules in the <a href=\"{0}Site/robots/edit\">robots.txt</a> skin.": "Bearbeiten Sie die Regeln im <a href=\"{0}Site/robots/edit\">robots.txt</a>-Skin.",
"Edit {0}.{1}": "{0}.{1} bearbeiten",
"Enabled": "Aktiviert",
"Enter one filter {0}pattern{1} per line to be applied on every URL in the referrer and backlink lists.": "Geben Sie ein {0}Filter-Schema{1} pro Zeile ein, das für jede Adresse in den Rückverweis-Listen angewendet werden soll.",
@ -290,6 +291,7 @@ global.messages['de'] = {
"Resource type (e.g. Story or Comment)": "Art der Ressource (z.B. Beitrag oder Kommentar)",
"Restricted": "Eingeschränkt",
"Results": "Ergebnis",
"Robot rules": "Regeln für Robots",
"Role": "Rolle",
"Running": "Laufende",
"Running Polls": "Laufende Umfragen",
@ -359,6 +361,7 @@ global.messages['de'] = {
"Terms and Conditions": "Nutzungsbedingungen",
"Text": "Text",
"Thanks, your vote was registered. You can change your mind until the poll is closed.": "Danke, Ihre Stimme wurde gezählt. Bis die Umfrage beendet ist, können Sie Ihre Meinung jederzeit ändern.",
"The <a href=\"{0}\">robots.txt</a> file disallows access to this page.": "Die <a href=\"{0}\">robots.txt</a>-Datei verbietet den Zugriff auf diese Seite.",
"The Management": "Die Direktion",
"The URL endpoint for each of these APIs is located at": "Die Internet-Adresse für jede dieser Schnittstellen lautet",
"The account data will be available for download from here within the next days.": "Die Kontodaten stehen demnächst hier zum Download bereit.",
@ -529,6 +532,8 @@ global.messages['de'] = {
"e-mail": "E-Mail",
"e.g. {0}": "z.B. {0}",
"enabled": "aktiviert",
"enforce": "erzwingen",
"enforced": "erzwingen",
"export": "Exportieren",
"featured": "sichtbar",
"file": "Datei",
@ -578,6 +583,7 @@ global.messages['de'] = {
"soon": "in Kürze",
"stories": "Beiträge",
"story": "Beitrag",
"suggest": "vorschlagen",
"tag": "Stichwort",
"tags": "Stichworte",
"tomorrow": "morgen",

164
package-lock.json generated
View file

@ -28,24 +28,24 @@
}
},
"node_modules/@babel/code-frame": {
"version": "7.26.2",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz",
"integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
"integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-validator-identifier": "^7.25.9",
"@babel/helper-validator-identifier": "^7.27.1",
"js-tokens": "^4.0.0",
"picocolors": "^1.0.0"
"picocolors": "^1.1.1"
},
"engines": {
"node": ">=6.9.0"
}
},
"node_modules/@babel/helper-string-parser": {
"version": "7.25.9",
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz",
"integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
"integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
"dev": true,
"license": "MIT",
"engines": {
@ -53,9 +53,9 @@
}
},
"node_modules/@babel/helper-validator-identifier": {
"version": "7.25.9",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz",
"integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz",
"integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==",
"dev": true,
"license": "MIT",
"engines": {
@ -63,13 +63,13 @@
}
},
"node_modules/@babel/parser": {
"version": "7.27.0",
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.0.tgz",
"integrity": "sha512-iaepho73/2Pz7w2eMS0Q5f83+0RKI7i4xmiYeBmDzfRVbQtTOG7Ts0S4HzJVsTMGI9keU8rNfuZr8DKfSt7Yyg==",
"version": "7.27.2",
"resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.27.2.tgz",
"integrity": "sha512-QYLs8299NA7WM/bZAdp+CviYYkVoYXlDW2rzliy3chxd1PQjej7JORuMJDJXJUb9g0TT+B99EwaVLKmX+sPXWw==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/types": "^7.27.0"
"@babel/types": "^7.27.1"
},
"bin": {
"parser": "bin/babel-parser.js"
@ -79,14 +79,14 @@
}
},
"node_modules/@babel/types": {
"version": "7.27.0",
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.0.tgz",
"integrity": "sha512-H45s8fVLYjbhFH62dIJ3WtmJ6RSPt/3DRO0ZcT2SUiYiQyz3BLVb9ADEnLl91m74aQPS3AzzeajZHYOalWe3bg==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/types/-/types-7.27.1.tgz",
"integrity": "sha512-+EzkxvLNfiUeKMgy/3luqfsCWFRXLb7U6wNQTk60tovuckwB15B191tJWvpp4HjiQWdJkCxO3Wbvc6jlk3Xb2Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-string-parser": "^7.25.9",
"@babel/helper-validator-identifier": "^7.25.9"
"@babel/helper-string-parser": "^7.27.1",
"@babel/helper-validator-identifier": "^7.27.1"
},
"engines": {
"node": ">=6.9.0"
@ -214,9 +214,9 @@
}
},
"node_modules/@npmcli/arborist": {
"version": "9.0.2",
"resolved": "https://registry.npmjs.org/@npmcli/arborist/-/arborist-9.0.2.tgz",
"integrity": "sha512-9z5FgIYd62LxcuCF2BAXnsEo059pGoPv/1E3XkrKBlB9kOQnJ6WSsyOjuGIcZfLAXseamyAif2J7yAVkWNdWzA==",
"version": "9.1.1",
"resolved": "https://registry.npmjs.org/@npmcli/arborist/-/arborist-9.1.1.tgz",
"integrity": "sha512-dtANj0Y757hrIDBfylk6neUzMi2yOX0+jK/YjwKrjSMOzis/o8APRfo6VCKL9hhodAeBW72xD65aN9gPzwQz8Q==",
"dev": true,
"license": "ISC",
"dependencies": {
@ -387,9 +387,9 @@
}
},
"node_modules/@npmcli/package-json": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/@npmcli/package-json/-/package-json-6.1.1.tgz",
"integrity": "sha512-d5qimadRAUCO4A/Txw71VM7UrRZzV+NPclxz/dc+M6B2oYwjWTjqh8HA/sGQgs9VZuJ6I/P7XIAlJvgrl27ZOw==",
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/@npmcli/package-json/-/package-json-6.2.0.tgz",
"integrity": "sha512-rCNLSB/JzNvot0SEyXqWZ7tX2B5dD2a1br2Dp0vSYVo5jh8Z0EZ7lS9TsZ1UtziddB1UfNUaMCc538/HztnJGA==",
"dev": true,
"license": "ISC",
"dependencies": {
@ -453,9 +453,9 @@
}
},
"node_modules/@npmcli/redact": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/@npmcli/redact/-/redact-3.2.0.tgz",
"integrity": "sha512-NyJXHoZwJE0iUsCDTclXf1bWHJTsshtnp5xUN6F2vY+OLJv6d2cNc4Do6fKNkmPToB0GzoffxRh405ibTwG+Og==",
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/@npmcli/redact/-/redact-3.2.2.tgz",
"integrity": "sha512-7VmYAmk4csGv08QzrDKScdzn11jHPFGyqJW39FyPgPuAp3zIaUmuCo1yxw9aGs+NEJuTGQ9Gwqpt93vtJubucg==",
"dev": true,
"license": "ISC",
"engines": {
@ -515,9 +515,9 @@
}
},
"node_modules/@sigstore/protobuf-specs": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/@sigstore/protobuf-specs/-/protobuf-specs-0.4.1.tgz",
"integrity": "sha512-7MJXQhIm7dWF9zo7rRtMYh8d2gSnc3+JddeQOTIg6gUN7FjcuckZ9EwGq+ReeQtbbl3Tbf5YqRrWxA1DMfIn+w==",
"version": "0.4.2",
"resolved": "https://registry.npmjs.org/@sigstore/protobuf-specs/-/protobuf-specs-0.4.2.tgz",
"integrity": "sha512-F2ye+n1INNhqT0MW+LfUEvTUPc/nS70vICJcxorKl7/gV9CO39+EDCw+qHNKEqvsDWk++yGVKCbzK1qLPvmC8g==",
"dev": true,
"license": "Apache-2.0",
"engines": {
@ -543,13 +543,13 @@
}
},
"node_modules/@sigstore/tuf": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/@sigstore/tuf/-/tuf-3.1.0.tgz",
"integrity": "sha512-suVMQEA+sKdOz5hwP9qNcEjX6B45R+hFFr4LAWzbRc5O+U2IInwvay/bpG5a4s+qR35P/JK/PiKiRGjfuLy1IA==",
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/@sigstore/tuf/-/tuf-3.1.1.tgz",
"integrity": "sha512-eFFvlcBIoGwVkkwmTi/vEQFSva3xs5Ot3WmBcjgjVdiaoelBLQaQ/ZBfhlG0MnG0cmTYScPpk7eDdGDWUcFUmg==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@sigstore/protobuf-specs": "^0.4.0",
"@sigstore/protobuf-specs": "^0.4.1",
"tuf-js": "^3.0.1"
},
"engines": {
@ -557,15 +557,15 @@
}
},
"node_modules/@sigstore/verify": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@sigstore/verify/-/verify-2.1.0.tgz",
"integrity": "sha512-kAAM06ca4CzhvjIZdONAL9+MLppW3K48wOFy1TbuaWFW/OMfl8JuTgW0Bm02JB1WJGT/ET2eqav0KTEKmxqkIA==",
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/@sigstore/verify/-/verify-2.1.1.tgz",
"integrity": "sha512-hVJD77oT67aowHxwT4+M6PGOp+E2LtLdTK3+FC0lBO9T7sYwItDMXZ7Z07IDCvR1M717a4axbIWckrW67KMP/w==",
"dev": true,
"license": "Apache-2.0",
"dependencies": {
"@sigstore/bundle": "^3.1.0",
"@sigstore/core": "^2.0.0",
"@sigstore/protobuf-specs": "^0.4.0"
"@sigstore/protobuf-specs": "^0.4.1"
},
"engines": {
"node": "^18.17.0 || >=20.5.0"
@ -773,9 +773,9 @@
}
},
"node_modules/asn1.js/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -941,9 +941,9 @@
"license": "MIT"
},
"node_modules/bn.js": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.1.tgz",
"integrity": "sha512-eXRvHzWyYPBuB4NBy0cmYQjGitUrtqwbvlzP3G6VFnNRbsZQIxQ10PbKKHt8gZ/HW/D/747aDl+QkDqg3KQLMQ==",
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-5.2.2.tgz",
"integrity": "sha512-v2YAxEmKaBLahNwE1mjp4WON6huMNeuDvagFZW+ASCuA/ku0bXR9hSMw0XpiqMoA3+rmnyck/tPRSFQkoC9Cuw==",
"dev": true,
"license": "MIT"
},
@ -1621,9 +1621,9 @@
}
},
"node_modules/create-ecdh/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -1742,9 +1742,9 @@
"license": "Apache-2.0"
},
"node_modules/debug": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.0.tgz",
"integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz",
"integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==",
"dev": true,
"license": "MIT",
"dependencies": {
@ -1876,9 +1876,9 @@
}
},
"node_modules/diffie-hellman/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -1942,9 +1942,9 @@
}
},
"node_modules/elliptic/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -2240,9 +2240,9 @@
}
},
"node_modules/generate-license-file/node_modules/glob": {
"version": "11.0.1",
"resolved": "https://registry.npmjs.org/glob/-/glob-11.0.1.tgz",
"integrity": "sha512-zrQDm8XPnYEKawJScsnM0QzobJxlT/kHOOlRTio8IH/GrmxRE5fjllkzdaHclIuNjUQTJYH2xHNIGfdpJkDJUw==",
"version": "11.0.2",
"resolved": "https://registry.npmjs.org/glob/-/glob-11.0.2.tgz",
"integrity": "sha512-YT7U7Vye+t5fZ/QMkBFrTJ7ZQxInIUjwyAjVj84CYXqgBdv30MFUPGnBR6sQaVq6Is15wYJUsnzTuWaGRBhBAQ==",
"dev": true,
"license": "ISC",
"dependencies": {
@ -2264,9 +2264,9 @@
}
},
"node_modules/generate-license-file/node_modules/jackspeak": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.1.0.tgz",
"integrity": "sha512-9DDdhb5j6cpeitCbvLO7n7J4IxnbM6hoF6O1g4HQ5TfhvvKN8ywDM7668ZhMHRqVmxqhps/F6syWK2KcPxYlkw==",
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-4.1.1.tgz",
"integrity": "sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==",
"dev": true,
"license": "BlueOak-1.0.0",
"dependencies": {
@ -2573,9 +2573,9 @@
}
},
"node_modules/http-cache-semantics": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz",
"integrity": "sha512-er295DKPVsV82j5kw1Gjt+ADA/XYHsajl82cGNQG2eyoPkvgUhX+nDIyelzhIWbbsXP39EHcI6l5tYs2FYqYXQ==",
"version": "4.2.0",
"resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.2.0.tgz",
"integrity": "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==",
"dev": true,
"license": "BSD-2-Clause"
},
@ -3453,9 +3453,9 @@
}
},
"node_modules/miller-rabin/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -4555,9 +4555,9 @@
}
},
"node_modules/public-encrypt/node_modules/bn.js": {
"version": "4.12.1",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.1.tgz",
"integrity": "sha512-k8TVBiPkPJT9uHLdOKfFpqcfprwBFOAAXXozRubr7R7PfIuKvQlzcI4M0pALeqXN09vdaMbUdUj+pass+uULAg==",
"version": "4.12.2",
"resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.12.2.tgz",
"integrity": "sha512-n4DSx829VRTRByMRGdjQ9iqsN0Bh4OolPsFnaZBLcbi8iXcB+kJ9s7EnRt4wILZNV3kPLHkRVfOc/HvhC3ovDw==",
"dev": true,
"license": "MIT"
},
@ -4836,9 +4836,9 @@
"optional": true
},
"node_modules/semver": {
"version": "7.7.1",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.1.tgz",
"integrity": "sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA==",
"version": "7.7.2",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
"integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
"dev": true,
"license": "ISC",
"bin": {
@ -5518,9 +5518,9 @@
}
},
"node_modules/tinyglobby": {
"version": "0.2.13",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz",
"integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==",
"version": "0.2.14",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
"dev": true,
"license": "MIT",
"dependencies": {
@ -6031,9 +6031,9 @@
}
},
"node_modules/zod": {
"version": "3.24.3",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.3.tgz",
"integrity": "sha512-HhY1oqzWCQWuUqvBFnsyrtZRhyPeR7SUGv+C4+MsisMuVfSPx8HpwWqH8tRahSlt6M3PiFAcoeFhZAqIXTxoSg==",
"version": "3.25.28",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.28.tgz",
"integrity": "sha512-/nt/67WYKnr5by3YS7LroZJbtcCBurDKKPBPWWzaxvVCGuG/NOsiKkrjoOhI8mJ+SQUXEbUzeB3S+6XDUEEj7Q==",
"dev": true,
"license": "MIT",
"funding": {

912
tests/robots.js Normal file
View file

@ -0,0 +1,912 @@
// Unit tests of the robots parser
// Source: <https://github.com/samclarke/robots-parser/blob/master/test/Robots.js>
// Copyright (c) 2014 Sam Clarke
// MIT License (MIT)
// Run with `npx nyc --reporter=text-summary --reporter=html --reporter=lcovonly mocha tests/robots.js`
// Set up the test environment with Antvilles version of the robots parser
const Robots = require('../code/Global/Robots.js');
const robotsParser = (url, contents) => new Robots(url, contents);
const { expect } = require('chai');
function testRobots(url, contents, allowed, disallowed) {
var robots = robotsParser(url, contents);
allowed.forEach(function (url) {
expect(robots.isAllowed(url)).to.equal(true);
});
disallowed.forEach(function (url) {
expect(robots.isDisallowed(url)).to.equal(true);
});
}
describe('Robots', function () {
it('should parse the disallow directive', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should parse the allow directive', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html',
'Allow: /fish/test.html',
'Allow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/fish/test.html',
'http://www.example.com/Test.html',
'http://www.example.com/test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should parse patterns', function () {
var contents = [
'User-agent: *',
'Disallow: /fish*.php',
'Disallow: /*.dext$',
'Disallow: /dir*'
].join('\n');
var allowed = [
'http://www.example.com/Fish.PHP',
'http://www.example.com/Fish.dext1',
'http://www.example.com/folder/dir.html',
'http://www.example.com/folder/dir/test.html'
];
var disallowed = [
'http://www.example.com/fish.php',
'http://www.example.com/fishheads/catfish.php?parameters',
'http://www.example.com/AnYthInG.dext',
'http://www.example.com/Fish.dext.dext',
'http://www.example.com/dir/test.html',
'http://www.example.com/directory.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should have the correct order precedence for allow and disallow', function () {
var contents = [
'User-agent: *',
'Disallow: /fish*.php',
'Allow: /fish/index.php',
'Disallow: /test',
'Allow: /test/',
'Disallow: /aa/',
'Allow: /aa/',
'Allow: /bb/',
'Disallow: /bb/',
].join('\n');
var allowed = [
'http://www.example.com/test/index.html',
'http://www.example.com/fish/index.php',
'http://www.example.com/test/',
'http://www.example.com/aa/',
'http://www.example.com/bb/',
'http://www.example.com/x/'
];
var disallowed = [
'http://www.example.com/fish.php',
'http://www.example.com/fishheads/catfish.php?parameters',
'http://www.example.com/test'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should have the correct order precedence for wildcards', function () {
var contents = [
'User-agent: *',
'Disallow: /*/',
'Allow: /x/',
].join('\n');
var allowed = [
'http://www.example.com/x/',
'http://www.example.com/fish.php',
'http://www.example.com/test'
];
var disallowed = [
'http://www.example.com/a/',
'http://www.example.com/xx/',
'http://www.example.com/test/index.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should parse lines delimitated by \\r', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\r');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should parse lines delimitated by \\r\\n', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\r\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should parse lines delimitated by mixed line endings', function () {
var contents = [
'User-agent: *\r',
'Disallow: /fish/\r\n',
'Disallow: /test.html\n\n'
].join('');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should ignore rules that are not in a group', function () {
var contents = [
'Disallow: /secret.html',
'Disallow: /test',
].join('\n');
var allowed = [
'http://www.example.com/secret.html',
'http://www.example.com/test/index.html',
'http://www.example.com/test/'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, []);
});
it('should ignore comments', function () {
var contents = [
'#',
'# This is a comment',
'#',
'User-agent: *',
'# This is a comment',
'Disallow: /fish/ # ignore',
'# Disallow: fish',
'Disallow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should ignore invalid lines', function () {
var contents = [
'invalid line',
'User-agent: *',
'Disallow: /fish/',
':::::another invalid line:::::',
'Disallow: /test.html',
'Unknown: tule'
].join('\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should ignore empty user-agent lines', function () {
var contents = [
'User-agent:',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html',
'http://www.example.com/fish/index.php',
'http://www.example.com/fish/',
'http://www.example.com/test.html'
];
var disallowed = [];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should support groups with multiple user agents (case insensitive)', function () {
var contents = [
'User-agent: agenta',
'User-agent: agentb',
'Disallow: /fish',
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.isAllowed("http://www.example.com/fish", "agenta")).to.equal(false);
});
it('should return undefined for invalid urls', function () {
var contents = [
'User-agent: *',
'Disallow: /secret.html',
'Disallow: /test',
].join('\n');
var invalidUrls = [
'http://example.com/secret.html',
'http://ex ample.com/secret.html',
'http://www.example.net/test/index.html',
'http://www.examsple.com/test/',
'example.com/test/',
':::::;;`\\|/.example.com/test/'
];
var robots = robotsParser('http://www.example.com/robots.txt', contents);
invalidUrls.forEach(function (url) {
expect(robots.isAllowed(url)).to.equal(undefined);
});
});
it('should handle Unicode, urlencoded and punycode URLs', function () {
var contents = [
'User-agent: *',
'Disallow: /secret.html',
'Disallow: /test',
].join('\n');
var allowed = [
'http://www.münich.com/index.html',
'http://www.xn--mnich-kva.com/index.html',
'http://www.m%C3%BCnich.com/index.html'
];
var disallowed = [
'http://www.münich.com/secret.html',
'http://www.xn--mnich-kva.com/secret.html',
'http://www.m%C3%BCnich.com/secret.html'
];
testRobots('http://www.münich.com/robots.txt', contents, allowed, disallowed);
testRobots('http://www.xn--mnich-kva.com/robots.txt', contents, allowed, disallowed);
testRobots('http://www.m%C3%BCnich.com/robots.txt', contents, allowed, disallowed);
});
it('should handle Unicode and urlencoded paths', function () {
var contents = [
'User-agent: *',
'Disallow: /%CF%80',
'Disallow: /%e2%9d%83',
'Disallow: /%a%a',
'Disallow: /💩',
'Disallow: /✼*t$',
'Disallow: /%E2%9C%A4*t$',
'Disallow: /✿%a',
'Disallow: /http%3A%2F%2Fexample.org'
].join('\n');
var allowed = [
'http://www.example.com/✼testing',
'http://www.example.com/%E2%9C%BCtesting',
'http://www.example.com/✤testing',
'http://www.example.com/%E2%9C%A4testing',
'http://www.example.com/http://example.org',
'http://www.example.com/http:%2F%2Fexample.org'
];
var disallowed = [
'http://www.example.com/%CF%80',
'http://www.example.com/%CF%80/index.html',
'http://www.example.com/π',
'http://www.example.com/π/index.html',
'http://www.example.com/%e2%9d%83',
'http://www.example.com/%E2%9D%83/index.html',
'http://www.example.com/❃',
'http://www.example.com/❃/index.html',
'http://www.example.com/%F0%9F%92%A9',
'http://www.example.com/%F0%9F%92%A9/index.html',
'http://www.example.com/💩',
'http://www.example.com/💩/index.html',
'http://www.example.com/%a%a',
'http://www.example.com/%a%a/index.html',
'http://www.example.com/✼test',
'http://www.example.com/%E2%9C%BCtest',
'http://www.example.com/✤test',
'http://www.example.com/%E2%9C%A4testt',
'http://www.example.com/✿%a',
'http://www.example.com/%E2%9C%BF%atest',
'http://www.example.com/http%3A%2F%2Fexample.org'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should handle lone high / low surrogates', function () {
var contents = [
'User-agent: *',
'Disallow: /\uD800',
'Disallow: /\uDC00'
].join('\n');
// These are invalid so can't be disallowed
var allowed = [
'http://www.example.com/\uDC00',
'http://www.example.com/\uD800'
];
var disallowed = [];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should ignore host case', function () {
var contents = [
'User-agent: *',
'Disallow: /secret.html',
'Disallow: /test',
].join('\n');
var allowed = [
'http://www.example.com/index.html',
'http://www.ExAmPlE.com/index.html',
'http://www.EXAMPLE.com/index.html'
];
var disallowed = [
'http://www.example.com/secret.html',
'http://www.ExAmPlE.com/secret.html',
'http://www.EXAMPLE.com/secret.html'
];
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
});
it('should handle relative paths', function () {
var contents = [
'User-agent: *',
'Disallow: /fish',
'Allow: /fish/test',
].join('\n');
var robots = robotsParser('/robots.txt', contents);
expect(robots.isAllowed('/fish/test')).to.equal(true);
expect(robots.isAllowed('/fish')).to.equal(false);
});
it('should not allow relative paths if domain specified', function () {
var contents = [
'User-agent: *',
'Disallow: /fish',
'Allow: /fish/test',
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
expect(robots.isAllowed('/fish')).to.equal(undefined);
});
it('should not treat invalid robots.txt URLs as relative', function () {
var contents = [
'User-agent: *',
'Disallow: /fish',
'Allow: /fish/test',
].join('\n');
var robots = robotsParser('https://ex ample.com/robots.txt', contents);
expect(robots.isAllowed('/fish/test')).to.equal(undefined);
expect(robots.isAllowed('/fish')).to.equal(undefined);
});
it('should not allow URls if domain specified and robots.txt is relative', function () {
var contents = [
'User-agent: *',
'Disallow: /fish',
'Allow: /fish/test',
].join('\n');
var robots = robotsParser('/robots.txt', contents);
expect(robots.isAllowed('http://www.example.com/fish/test')).to.equal(undefined);
expect(robots.isAllowed('http://www.example.com/fish')).to.equal(undefined);
});
it('should allow all if empty robots.txt', function () {
var allowed = [
'http://www.example.com/secret.html',
'http://www.example.com/test/index.html',
'http://www.example.com/test/'
];
var robots = robotsParser('http://www.example.com/robots.txt', '');
allowed.forEach(function (url) {
expect(robots.isAllowed(url)).to.equal(true);
});
});
it('should treat null as allowing all', function () {
var robots = robotsParser('http://www.example.com/robots.txt', null);
expect(robots.isAllowed("http://www.example.com/", "userAgent")).to.equal(true);
expect(robots.isAllowed("http://www.example.com/")).to.equal(true);
});
it('should handle invalid robots.txt urls', function () {
var contents = [
'user-agent: *',
'disallow: /',
'host: www.example.com',
'sitemap: /sitemap.xml'
].join('\n');
var sitemapUrls = [
undefined,
null,
'null',
':/wom/test/'
];
sitemapUrls.forEach(function (url) {
var robots = robotsParser(url, contents);
expect(robots.isAllowed('http://www.example.com/index.html')).to.equal(undefined);
expect(robots.getPreferredHost()).to.equal('www.example.com');
expect(robots.getSitemaps()).to.eql(['/sitemap.xml']);
});
});
it('should parse the crawl-delay directive', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1',
'user-agent: b',
'disallow: /d',
'user-agent: c',
'user-agent: d',
'crawl-delay: 10'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getCrawlDelay('a')).to.equal(1);
expect(robots.getCrawlDelay('b')).to.equal(undefined);
expect(robots.getCrawlDelay('c')).to.equal(10);
expect(robots.getCrawlDelay('d')).to.equal(10);
expect(robots.getCrawlDelay()).to.equal(undefined);
});
it('should ignore invalid crawl-delay directives', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1.2.1',
'user-agent: b',
'crawl-delay: 1.a0',
'user-agent: c',
'user-agent: d',
'crawl-delay: 10a'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getCrawlDelay('a')).to.equal(undefined);
expect(robots.getCrawlDelay('b')).to.equal(undefined);
expect(robots.getCrawlDelay('c')).to.equal(undefined);
expect(robots.getCrawlDelay('d')).to.equal(undefined);
});
it('should parse the sitemap directive', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1',
'sitemap: http://example.com/test.xml',
'user-agent: b',
'disallow: /d',
'sitemap: /sitemap.xml',
'sitemap: http://example.com/test/sitemap.xml '
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getSitemaps()).to.eql([
'http://example.com/test.xml',
'/sitemap.xml',
'http://example.com/test/sitemap.xml'
]);
});
it('should parse the host directive', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1',
'host: www.example.net',
'user-agent: b',
'disallow: /d',
'host: example.com'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getPreferredHost()).to.equal('example.com');
});
it('should parse empty and invalid directives', function () {
var contents = [
'user-agent:',
'user-agent:::: a::',
'crawl-delay:',
'crawl-delay:::: 0:',
'host:',
'host:: example.com',
'sitemap:',
'sitemap:: site:map.xml',
'disallow:',
'disallow::: /:',
'allow:',
'allow::: /:',
].join('\n');
robotsParser('http://www.example.com/robots.txt', contents);
});
it('should treat only the last host directive as valid', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1',
'host: www.example.net',
'user-agent: b',
'disallow: /d',
'host: example.net',
'host: example.com'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getPreferredHost()).to.equal('example.com');
});
it('should return null when there is no host directive', function () {
var contents = [
'user-agent: a',
'crawl-delay: 1',
'user-agent: b',
'disallow: /d',
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getPreferredHost()).to.equal(null);
});
it('should fallback to * when a UA has no rules of its own', function () {
var contents = [
'user-agent: *',
'crawl-delay: 1',
'user-agent: b',
'crawl-delay: 12',
'user-agent: c',
'user-agent: d',
'crawl-delay: 10'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getCrawlDelay('should-fall-back')).to.equal(1);
expect(robots.getCrawlDelay('d')).to.equal(10);
expect(robots.getCrawlDelay('dd')).to.equal(1);
});
it('should not fallback to * when a UA has rules', function () {
var contents = [
'user-agent: *',
'crawl-delay: 1',
'user-agent: b',
'disallow:'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getCrawlDelay('b')).to.equal(undefined);
});
it('should handle UAs with object property names', function () {
var contents = [
'User-agent: *',
'Disallow: /fish',
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.isAllowed('http://www.example.com/fish', 'constructor')).to.equal(false);
expect(robots.isAllowed('http://www.example.com/fish', '__proto__')).to.equal(false);
});
it('should ignore version numbers in the UA string', function () {
var contents = [
'user-agent: *',
'crawl-delay: 1',
'user-agent: b',
'crawl-delay: 12',
'user-agent: c',
'user-agent: d',
'crawl-delay: 10'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getCrawlDelay('should-fall-back/1.0.0')).to.equal(1);
expect(robots.getCrawlDelay('d/12')).to.equal(10);
expect(robots.getCrawlDelay('dd / 0-32-3')).to.equal(1);
expect(robots.getCrawlDelay('b / 1.0')).to.equal(12);
});
it('should return the line number of the matching directive', function () {
var contents = [
'',
'User-agent: *',
'',
'Disallow: /fish/',
'Disallow: /test.html',
'Allow: /fish/test.html',
'Allow: /test.html',
'',
'User-agent: a',
'allow: /',
'',
'User-agent: b',
'disallow: /test',
'disallow: /t*t',
'',
'User-agent: c',
'Disallow: /fish*.php',
'Allow: /fish/index.php'
].join('\n');
var robots = robotsParser('http://www.example.com/robots.txt', contents);
expect(robots.getMatchingLineNumber('http://www.example.com/fish')).to.equal(-1);
expect(robots.getMatchingLineNumber('http://www.example.com/fish/test.html')).to.equal(6);
expect(robots.getMatchingLineNumber('http://www.example.com/Test.html')).to.equal(-1);
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php')).to.equal(4);
expect(robots.getMatchingLineNumber('http://www.example.com/fish/')).to.equal(4);
expect(robots.getMatchingLineNumber('http://www.example.com/test.html')).to.equal(7);
expect(robots.getMatchingLineNumber('http://www.example.com/test.html', 'a')).to.equal(10);
expect(robots.getMatchingLineNumber('http://www.example.com/fish.php', 'c')).to.equal(17);
expect(robots.getMatchingLineNumber('http://www.example.com/fish/index.php', 'c')).to.equal(18);
});
it('should handle large wildcards efficiently', function () {
var contents = [
'User-agent: *',
'Disallow: /' + '*'.repeat(2048) + '.html',
].join('\n');
var allowed = [
'http://www.example.com/' + 'sub'.repeat(2048) + 'folder/index.php',
];
var disallowed = [
'http://www.example.com/secret.html'
];
const start = Date.now();
testRobots('http://www.eXample.com/robots.txt', contents, allowed, disallowed);
const end = Date.now();
// Should take less than 500 ms (high to allow for variableness of
// machines running the test, should normally be much less)
expect(end - start).to.be.lessThan(500);
});
it('should honor given port number', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com:8080/fish',
'http://www.example.com:8080/Test.html'
];
var disallowed = [
'http://www.example.com/fish',
'http://www.example.com/Test.html',
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html'
];
testRobots('http://www.example.com:8080/robots.txt', contents, allowed, disallowed);
});
it('should default to port 80 for http: if no port given', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');
var allowed = [
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html'
];
var disallowed = [
'http://www.example.com:443/fish',
'http://www.example.com:443/Test.html',
'http://www.example.com:80/fish/index.php',
'http://www.example.com:80/fish/',
'http://www.example.com:80/test.html'
];
testRobots('http://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should default to port 443 for https: if no port given', function () {
var contents = [
'User-agent: *',
'Disallow: /fish/',
'Disallow: /test.html'
].join('\n');
var allowed = [
'https://www.example.com:443/fish',
'https://www.example.com:443/Test.html',
'https://www.example.com/fish',
'https://www.example.com/Test.html'
];
var disallowed = [
'http://www.example.com:80/fish',
'http://www.example.com:80/Test.html',
'http://www.example.com:443/fish/index.php',
'http://www.example.com:443/fish/',
'http://www.example.com:443/test.html'
];
testRobots('https://www.example.com/robots.txt', contents, allowed, disallowed);
});
it('should not be disallowed when wildcard is used in explicit mode', function () {
var contents = [
'User-agent: *',
'Disallow: /',
].join('\n')
var url = 'https://www.example.com/hello'
var userAgent = 'SomeBot';
var robots = robotsParser(url, contents);
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(false)
});
it('should be disallowed when user agent equal robots rule in explicit mode', function () {
var contents = [
'User-agent: SomeBot',
'Disallow: /',
].join('\n')
var url = 'https://www.example.com/hello'
var userAgent = 'SomeBot';
var robots = robotsParser(url, contents);
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(true)
});
it('should return undefined when given an invalid URL in explicit mode', function () {
var contents = [
'User-agent: SomeBot',
'Disallow: /',
].join('\n')
var url = 'https://www.example.com/hello'
var userAgent = 'SomeBot';
var robots = robotsParser('http://example.com', contents);
expect(robots.isExplicitlyDisallowed(url, userAgent)).to.equal(undefined)
});
});

View file

@ -11,7 +11,7 @@ clean {
}
dependencies {
implementation 'com.h2database:h2:1.4.200'
implementation 'com.h2database:h2:2.2.220'
}
configurations {
@ -112,9 +112,8 @@ tasks.register('installJars', Copy) {
tasks.register('buildDatabase', JavaExec) {
description 'Builds the H2 SQL database file.'
group 'build'
dependsOn ':assemble'
def inputFile = "$antvilleInstallDir/db/postgre.sql"
def inputFile = "${project.rootDir}/db/postgre.sql"
def outputDir = "$helmaInstallDir/db"
inputs.file inputFile
@ -130,7 +129,7 @@ tasks.register('buildDatabase', JavaExec) {
args = [
'-continueOnError',
'-script', inputFile,
'-url', "jdbc:h2:$outputDir/antville",
'-url', "jdbc:h2:$outputDir/antville;mode=postgresql;non_keywords=value",
'-user', 'antville',
'-password', 'antville'
]
@ -165,7 +164,7 @@ task assembleDist {
tasks.register('runH2Console', JavaExec) {
description 'Runs the H2 SQL database console.'
group 'Help'
dependsOn 'installJars'
dependsOn 'buildDatabase'
def h2Jar = configurations.library.files.find { jar ->
jar.name.startsWith('h2')
@ -178,9 +177,6 @@ tasks.register('runH2Console', JavaExec) {
'-user', 'antville',
'-password', 'antville'
]
// standardOutput = new ByteArrayOutputStream()
// ignoreExitValue true
}
tasks.register('distZip', Zip) {

View file

@ -7,3 +7,8 @@
summary {
display: revert;
}
// TODO: Actually a compatibility fix. See https://tobi.antville.org/stories/2317931/
td[nowrap][width]:not([width$="%"]) {
white-space: normal
}