From 42a0d7e610f6b81f9f43052a57806fc684b50013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20Sch=C3=A4fer?= Date: Sat, 10 May 2025 21:59:52 +0200 Subject: [PATCH] Add routine to enforce rules in robots.txt if corresponding setting is enabled --- code/HopObject/HopObject.js | 8 ++++++++ code/Site/Site.js | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/code/HopObject/HopObject.js b/code/HopObject/HopObject.js index 9e35a00d..f2a2782a 100644 --- a/code/HopObject/HopObject.js +++ b/code/HopObject/HopObject.js @@ -140,10 +140,18 @@ HopObject.prototype.onRequest = function() { } } + // Set up layout handler and skin path HopObject.confirmConstructor(Layout); res.handlers.layout = res.handlers.site.layout || new Layout; res.skinpath = res.handlers.layout.getSkinPath(); + if (res.handlers.site.enforceRobotsTxt()) { + res.status = 403 + res.data.error = "Robots.txt forbids access to this page."; + root.error_action(); + res.stop(); + } + if (!this.getPermission(req.action)) { if (!session.user) { User.setLocation(root.href() + req.path); diff --git a/code/Site/Site.js b/code/Site/Site.js index dd8be05a..92bcea8e 100644 --- a/code/Site/Site.js +++ b/code/Site/Site.js @@ -1132,3 +1132,22 @@ Site.prototype.callback = function(ref) { } return; } + +Site.prototype.enforceRobotsTxt = function() { + if (this.robotsTxtMode !== Site.ENFORCED) { + return false; + } + + // Override some patterns to prevent a site from becoming inaccessible even for the owner + const overrides = [ + 'User-agent: mozilla', + 'Allow: */edit$', + 'Allow: */layout', + 'Allow: */main.*$', + 'Allow: */members' + ]; + + const robotsTxt = root.renderSkinAsString('Site#robots'); + const robots = new Robots(this.href('robots.txt'), robotsTxt + overrides.join('\n')); + return !robots.isAllowed(path.href() + req.action, req.getHeader('user-agent')); +}