diff --git a/docs/user/reference/sitemaps.rst b/docs/user/reference/sitemaps.rst index 85d9e50e8d3..9e89a5c9f00 100644 --- a/docs/user/reference/sitemaps.rst +++ b/docs/user/reference/sitemaps.rst @@ -16,12 +16,18 @@ It contains information such as: * How important this URL is in relation to other URLs in the site. * What translations are available for a page. -Read the Docs automatically generates a ``sitemap.xml`` for your project, +Read the Docs automatically generates a ``sitemap.xml`` and a +``sitemap_index.xml`` for your project, By default the sitemap includes: * Each version of your documentation and when it was last updated, sorted by version number. +By default the sitemap index includes: + +* The location of ``sitemap.xml`` +* The locations of the ``sitemap.xml`` of subprojects if they are set. + This allows search engines to prioritize results based on the version number, sorted by `semantic versioning`_. diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 6d60009b9aa..e82dd582c2b 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -850,7 +850,7 @@ def test_default_robots_txt(self, storage_exists): Disallow: # Allow everything - Sitemap: https://project.readthedocs.io/sitemap.xml + Sitemap: https://project.readthedocs.io/sitemap_index.xml """ ).lstrip() self.assertEqual(response.content.decode(), expected) @@ -904,7 +904,7 @@ def test_default_robots_txt_disallow_hidden_versions(self, storage_exists): Disallow: /en/hidden/ # Hidden version - Sitemap: https://project.readthedocs.io/sitemap.xml + Sitemap: https://project.readthedocs.io/sitemap_index.xml """ ).lstrip() self.assertEqual(response.content.decode(), expected) @@ -1733,6 +1733,51 @@ def test_sitemap_all_private_versions(self): ) self.assertEqual(response.status_code, 404) + def test_sitemap_subproject(self): + self.project.versions.update(active=True) + self.subproject.versions.update(active=True) + + subresponse = self.client.get( + reverse("sitemap_xml", args=["subproject"]), + headers={"host": "project.readthedocs.io"}, + ) + response = self.client.get( + reverse("sitemap_xml"), headers={"host": "subproject.readthedocs.io"} + ) + + self.assertEqual(subresponse.status_code, 200) + self.assertEqual(response.status_code, 200) + self.assertEqual(subresponse.content, response.content) + + def test_sitemap_index(self): + self.project.versions.update(active=True) + response = self.client.get( + reverse("sitemap_index_xml"), headers={"host": "project.readthedocs.io"} + ) + self.assertEqual(response.status_code, 200) + self.assertEqual(response["Content-Type"], "application/xml") + expected = dedent( + """ + + + + + https://project.readthedocs.io/sitemap.xml + + + + https://project.readthedocs.io/projects/subproject/sitemap.xml + + + + https://project.readthedocs.io/projects/subproject-alias/sitemap.xml + + + + """ + ).lstrip() + self.assertEqual(response.content.decode(), expected) + @mock.patch( "readthedocs.proxito.views.mixins.staticfiles_storage", new=StaticFileSystemStorageTest(), diff --git a/readthedocs/proxito/urls.py b/readthedocs/proxito/urls.py index 0dc1338037c..40687117955 100644 --- a/readthedocs/proxito/urls.py +++ b/readthedocs/proxito/urls.py @@ -46,6 +46,7 @@ ServeError404, ServePageRedirect, ServeRobotsTXT, + ServeSitemapIndexXML, ServeSitemapXML, ServeStaticFiles, ) @@ -136,7 +137,15 @@ name="proxito_404_handler", ), re_path(r"robots\.txt$", ServeRobotsTXT.as_view(), name="robots_txt"), - re_path(r"sitemap\.xml$", ServeSitemapXML.as_view(), name="sitemap_xml"), + re_path( + r"^(?:projects/(?P{project_slug})/)?" + r"sitemap\.xml$".format(**pattern_opts), + ServeSitemapXML.as_view(), + name="sitemap_xml", + ), + re_path( + r"sitemap_index\.xml$", ServeSitemapIndexXML.as_view(), name="sitemap_index_xml" + ), ] docs_urls = [ diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 4f44d51fa07..13656cafac4 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -763,8 +763,8 @@ def get(self, request): pass # Serve default robots.txt - sitemap_url = "{scheme}://{domain}/sitemap.xml".format( - scheme="https", + sitemap_url = '{scheme}://{domain}/sitemap_index.xml'.format( + scheme='https', domain=project.subdomain(), ) context = { @@ -813,7 +813,7 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View): # Extra cache tag to invalidate only this view if needed. project_cache_tag = "sitemap.xml" - def get(self, request): + def get(self, request, subproject_slug=None): """ Generate and serve a ``sitemap.xml`` for a particular ``project``. @@ -872,6 +872,12 @@ def changefreqs_generator(): yield from itertools.chain(changefreqs, itertools.repeat("monthly")) project = request.unresolved_domain.project + + if subproject_slug: + project = get_object_or_404( + project.subprojects, alias=subproject_slug + ).child + public_versions = Version.internal.public( project=project, only_active=True, @@ -970,6 +976,59 @@ class ServeSitemapXML(SettingsOverrideObject): _default_class = ServeSitemapXMLBase +class ServeSitemapIndexXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View): + + """Serve sitemap_index.xml from the domain's root.""" + + cache_response = True + project_cache_tag = "sitemap.xml" + + def get(self, request): + """ + Generate and serve a ``sitemap_index.xml`` for a ``project``. + + The sitemap index is generated from the project and all sub-projects. + """ + + project = request.unresolved_domain.project + + locations = [ + "{scheme}://{domain}/sitemap.xml".format( + scheme="https", + domain=project.subdomain(), + ) + ] + for subproject in project.related_projects.all(): + locations.append( + "{scheme}://{domain}/projects/{subproject}/sitemap.xml".format( + scheme="https", + domain=project.subdomain(), + subproject=subproject.slug, + ) + ) + context = { + "locations": locations, + } + return render( + request, + "sitemap_index.xml", + context, + content_type="application/xml", + ) + + def _get_project(self): + # Method used by the CDNCacheTagsMixin class. + return self.request.unresolved_domain.project + + def _get_version(self): + # This view isn't attached to a version. + return None + + +class ServeSitemapIndexXML(SettingsOverrideObject): + _default_class = ServeSitemapIndexXMLBase + + class ServeStaticFiles(CDNCacheControlMixin, CDNCacheTagsMixin, ServeDocsMixin, View): """ diff --git a/readthedocs/templates/sitemap_index.xml b/readthedocs/templates/sitemap_index.xml new file mode 100644 index 00000000000..c248d2820fa --- /dev/null +++ b/readthedocs/templates/sitemap_index.xml @@ -0,0 +1,8 @@ + + +{% for loc in locations %} + + {{ loc }} + +{% endfor %} +