From d1710fc1397f990deda2c36ff4baf115bd349b27 Mon Sep 17 00:00:00 2001 From: Christoph Rueger <chrisrueger@gmail.com> Date: Sat, 15 Feb 2025 09:38:45 +0100 Subject: [PATCH] docs: exclude /releases urls from search engines everything under urls like https://bnd.bndtools.org/releases/ will be excluded from the sitemap.xml and via robots.txt, so that google (hopefully) does not crawl them Why? e.g. https://bnd.bndtools.org/releases/4.0.0/instructions/sub.html contains outdated information from previous releases. but the problem is those pages dilute the search results in google and sometimes outdated pages come up first. it is better for bnd / bndtools that current information is preferred. Signed-off-by: Christoph Rueger <chrisrueger@gmail.com> --- docs/_config.yml | 5 +++++ docs/robots.txt | 7 +++++++ 2 files changed, 12 insertions(+) create mode 100644 docs/robots.txt diff --git a/docs/_config.yml b/docs/_config.yml index 0b28991a6b..8cebbba0a3 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -32,6 +32,11 @@ defaults: path: "" values: layout: default + # exclude all the files from `/releases/` in the sitemap + - scope: + path: releases/** + values: + sitemap: false exclude: - 'ADDING_RELEASE_DOCS.md' diff --git a/docs/robots.txt b/docs/robots.txt new file mode 100644 index 0000000000..10ba859b73 --- /dev/null +++ b/docs/robots.txt @@ -0,0 +1,7 @@ +# This robots.txt file controls crawling of URLs under https://bnd.bndtools.org. +# All crawlers are disallowed to crawl files in the "releases" directory, +# because they dilute search results, because they are outdated. +User-agent: * +Disallow: /releases/ + +Sitemap: https://bnd.bndtools.org/sitemap.xml \ No newline at end of file