Add routine to enforce rules in robots.txt if corresponding setting is enabled

This commit is contained in:
Tobi Schäfer 2025-05-10 21:59:52 +02:00 committed by Antville Git bot
parent c5b9a613a8
commit 42a0d7e610
2 changed files with 27 additions and 0 deletions

View file

@ -140,10 +140,18 @@ HopObject.prototype.onRequest = function() {
}
}
// Set up layout handler and skin path
HopObject.confirmConstructor(Layout);
res.handlers.layout = res.handlers.site.layout || new Layout;
res.skinpath = res.handlers.layout.getSkinPath();
if (res.handlers.site.enforceRobotsTxt()) {
res.status = 403
res.data.error = "Robots.txt forbids access to this page.";
root.error_action();
res.stop();
}
if (!this.getPermission(req.action)) {
if (!session.user) {
User.setLocation(root.href() + req.path);

View file

@ -1132,3 +1132,22 @@ Site.prototype.callback = function(ref) {
}
return;
}
Site.prototype.enforceRobotsTxt = function() {
if (this.robotsTxtMode !== Site.ENFORCED) {
return false;
}
// Override some patterns to prevent a site from becoming inaccessible even for the owner
const overrides = [
'User-agent: mozilla',
'Allow: */edit$',
'Allow: */layout',
'Allow: */main.*$',
'Allow: */members'
];
const robotsTxt = root.renderSkinAsString('Site#robots');
const robots = new Robots(this.href('robots.txt'), robotsTxt + overrides.join('\n'));
return !robots.isAllowed(path.href() + req.action, req.getHeader('user-agent'));
}