diff --git a/NEWS.md b/NEWS.md
index 351f7a6..9570cc2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,12 +13,10 @@
 * `ragnar_read()` and `read_as_markdown()` now accept paths
   that begin with `~` (@topepo, #46, #48).
 
-* Changes to `read_as_markdown()` HTML conversion (#40):
+* Changes to `read_as_markdown()` HTML conversion (#40, #51):
 
-  * If a 'main' tag is present, content outside the 'main' tag is now excluded
-    by default. To restore the previous behavior and include the sidebar, header,
-    footer, and other navigational elements in the converted markdown, use
-    `read_as_markdown(x, main_only=FALSE)`.
+  * New arguments `html_extract_selectors` and `html_zap_selectors` provide a flexible way to
+    exclude some html page elements from being included in the converted markdown.
 
   * Fixed handling of nested code fences in markdown output.
 
diff --git a/R/read-markdown.R b/R/read-markdown.R
index 7df8447..c5e1c6e 100644
--- a/R/read-markdown.R
+++ b/R/read-markdown.R
@@ -1,37 +1,100 @@
 #' Convert files to markdown
 #'
-#' @param x A filepath or url. Accepts a wide variety of file types, including
-#'   PDF, PowerPoint, Word, Excel, Images (EXIF metadata and OCR), Audio (EXIF
-#'   metadata and speech transcription), HTML, Text-based formats (CSV, JSON,
-#'   XML), ZIP files (iterates over contents), Youtube URLs, and EPubs.
-#' @param ... Passed on to `MarkItDown.convert()`
-#' @param canonical logical, whether to postprocess the output from MarkItDown
+#' @param x A filepath or URL. Accepts a wide variety of file types, including
+#'   PDF, PowerPoint, Word, Excel, images (EXIF metadata and OCR), audio (EXIF
+#'   metadata and speech transcription), HTML, text-based formats (CSV, JSON,
+#'   XML), ZIP files (iterates over contents), YouTube URLs, and EPUBs.
+#' @param ... Passed on to `MarkItDown.convert()`.
+#' @param canonical Logical. Whether to postprocess the output from MarkItDown
 #'   with `commonmark::markdown_commonmark()`.
-#' @param main_only logical. Applies only to HTML documents. If `TRUE` and a
-#'   `main` tag is present in the document, only the contents of the `main` tag
-#'   are returned. This is a convenient way to exclude navigational elements
-#'   typically found in sidebars, page headers, and footers.
-#'
-#' @returns A single string of markdown
+#' @param html_extract_selectors Character vector of CSS selectors. If a match
+#'   for a selector is found in the document, only the matched node's contents
+#'   are converted. Unmatched extract selectors have no effect.
+#' @param html_zap_selectors Character vector of CSS selectors. Elements
+#'   matching these selectors will be excluded ("zapped") from the HTML document
+#'   before conversion to markdown. This is useful for removing navigation bars,
+#'   sidebars, headers, footers, or other unwanted elements. By default,
+#'   navigation elements (`nav`) are excluded.
+#'
+#' @returns A single string of markdown.
 #' @export
 #'
 #' @examplesIf reticulate::py_available()
-#' # convert html
+#' # Convert HTML
 #' read_as_markdown("https://r4ds.hadley.nz/base-R.html") |>
-#'   substr(1, 1000) |>
+#'   substr(1, 500) |>
 #'   cat()
 #'
 #' read_as_markdown("https://r4ds.hadley.nz/base-R.html", canonical = TRUE) |>
-#'   substr(1, 1000) |>
+#'   substr(1, 500) |>
 #'   cat()
 #'
-#' # convert pdf
+#' # When converting HTML, you might want to omit certain elements, like
+#' # sidebars, headers, footers, etc. You can pass CSS selector strings
+#' # to either extract nodes or exclude nodes during conversion.
+#' #
+#' # The easiest way to make selectors is to use SelectorGadget:
+#' # https://rvest.tidyverse.orgarticles/selectorgadget.html
+#' #
+#' # You can also right-click on a page and select "Inspect Element" in a
+#' # browser to better understand an HTML page's structure.
+#' #
+#' # For comprehensive or advanced usage of CSS selectors, consult:
+#' # https://www.crummy.com/software/BeautifulSoup/bs4/doc/#css-selectors-through-the-css-property
+#' # https://facelessuser.github.io/soupsieve/selectors/
+#'
+#' url <- "https://duckdb.org/code_of_conduct"
+#' # Includes the sidebar and other navigational elements
+#' read_as_markdown(url) |> substr(1, 500) |> writeLines()
+#'
+#' # Extract the main content
+#' read_as_markdown(url, html_extract_selectors = "#main_content_wrap")
+#'
+#' # Alternative approach: exclude nodes
+#' read_as_markdown(
+#'   url,
+#'   html_zap_selectors = c(
+#'     "header",          # node name
+#'     ".sidenavigation", # node class
+#'     ".searchoverlay",  # node class
+#'     "#sidebar"         # node ID
+#'   )
+#' ) |> substr(1, 500) |> writeLines()
+#'
+#' # Quarto example
+#' url <- "https://quarto.org/docs/computations/python.html"
+#'
+#' # Include sidebar, footer, etc.
+#' read_as_markdown(
+#'   url,
+#'   html_extract_selectors = NULL,
+#'   html_zap_selectors = NULL
+#' ) |> substr(1, 500) |> writeLines()
+#'
+#' # Exclude content outside <main>
+#' read_as_markdown(url, html_extract_selectors = "main") |>
+#'   substr(1, 500) |> writeLines()
+#'
+#' # Exclude specific matching nodes
+#' read_as_markdown(
+#'   url,
+#'   html_extract_selectors = NULL,
+#'   html_zap_selectors = c(
+#'     "#quarto-sidebar",
+#'     "#quarto-margin-sidebar",
+#'     "header",
+#'     "footer",
+#'     "nav"
+#'   )
+#' ) |> substr(1, 500) |> writeLines()
+#'
+#' # Convert PDF
 #' pdf <- file.path(R.home("doc"), "NEWS.pdf")
 #' read_as_markdown(pdf) |> substr(1, 1000) |> cat()
-#' ## alternative:
+#' ## Alternative:
 #' # pdftools::pdf_text(pdf) |> substr(1, 2000) |> cat()
 #'
-#' # convert images to markdown descriptions using OpenAI
+#' # Convert images to markdown descriptions using OpenAI
 #' jpg <- file.path(R.home("doc"), "html", "logo.jpg")
 #' if (Sys.getenv("OPENAI_API_KEY") != "") {
 #'   # if (xfun::is_macos()) system("brew install ffmpeg")
@@ -56,7 +119,13 @@
 #'   chat <- ellmer::chat_openai(echo = TRUE)
 #'   chat$chat("Describe this image", ellmer::content_image_file(jpg))
 #' }
-read_as_markdown <- function(x, ..., canonical = FALSE, main_only = TRUE) {
+read_as_markdown <- function(
+  x,
+  ...,
+  canonical = FALSE,
+  html_extract_selectors = c("main"),
+  html_zap_selectors = c("nav")
+) {
   check_string(x)
   if (startsWith(x, "~")) {
     x <- path.expand(x)
@@ -68,40 +137,18 @@ read_as_markdown <- function(x, ..., canonical = FALSE, main_only = TRUE) {
     # dependencies that conflict
     md <- ragnartools.markitdown$convert_to_markdown(
       x,
+      html_extract_selectors = html_extract_selectors,
+      html_zap_selectors = html_zap_selectors,
       ...,
-      main_only = main_only
     )
   } else {
-    # use the markitdown cli API, (much) slower, but can be isolated from
-    # reticulated python.
-    # TODO: apply markitdown monkeypatches in cli interface too
-
-    check_dots_empty()
-    outfile <- withr::local_tempfile(fileext = ".md")
-    exit_code <- cli_markitdown(c(shQuote(x), "-o", shQuote(outfile)))
-    if (
-      !identical(exit_code, 0L) ||
-        (no_outfile_produced <- !file.exists(outfile))
-    ) {
-      # more useful output to stderr() should have been printed
-      # already by cli_markitdown() if we are here.
-      errmsg <- stri_flatten(
-        c(
-          paste("markitdown exit code: ", exit_code),
-          if (no_outfile_produced) "No output file produced."
-        ),
-        collapse = "\n"
-      )
-      stop(errmsg)
-    }
-
-    md <- stri_read_lines(outfile)
+    md <- read_as_markdown_cli(x, ...)
   }
 
   md <- stri_replace_all_fixed(md, "\f", "\n\n---\n\n")
   md <- unlist(stri_split_lines(md)) # normalize newlines
   md <- stri_trim_right(md)
-  if (canonical)
+  if (canonical) {
     md <- commonmark::markdown_commonmark(
       md,
       normalize = TRUE,
@@ -109,6 +156,7 @@ read_as_markdown <- function(x, ..., canonical = FALSE, main_only = TRUE) {
       width = 72L,
       extensions = TRUE
     )
+  }
   md <- stri_flatten(md, "\n")
   glue::as_glue(md)
 }
@@ -118,8 +166,9 @@ markdown_locate_boundaries_bytes_index <- function(text, tags = NULL) {
   lines <- text |> stri_split_lines() |> unlist()
   text <- lines |> stri_flatten("\n")
 
-  if (text == "")
+  if (text == "") {
     return(data_frame(tag = character(), start = integer(), end = integer()))
+  }
 
   doc <- text |>
     commonmark::markdown_html(
@@ -256,7 +305,9 @@ markdown_segment <- function(
   sizes <- drop_first(boundaries) - drop_last(boundaries)
   splits <- vec_chop(bytes, sizes = sizes) |> vapply(rawToChar, "")
 
-  if (trim) splits <- stri_trim_both(splits) # drops names
+  if (trim) {
+    splits <- stri_trim_both(splits)
+  } # drops names
 
   # make names
   split_tags <- c("", sourcepos$tag[match(tag_boundaries, sourcepos$start)])
@@ -294,8 +345,9 @@ markdown_frame <- function(
     names = "tag",
     leaves = "text"
   )
-  if (!length(segment_by) || base::setequal(segment_by, frame_by))
+  if (!length(segment_by) || base::setequal(segment_by, frame_by)) {
     frame[["tag"]] <- NULL
+  }
   as_tibble(frame)
 }
 
@@ -454,9 +506,39 @@ ragnar_read <- function(x, ..., split_by_tags = NULL, frame_by_tags = NULL) {
 
 # ------ utils
 
+read_as_markdown_cli <- function(x, ...) {
+  # use the markitdown cli API, (much) slower, but can be isolated from
+  # reticulated python.
+  # TODO: apply markitdown monkeypatches in cli interface too
+
+  check_dots_empty()
+  outfile <- withr::local_tempfile(fileext = ".md")
+  exit_code <- cli_markitdown(c(shQuote(x), "-o", shQuote(outfile)))
+  if (
+    !identical(exit_code, 0L) ||
+      (no_outfile_produced <- !file.exists(outfile))
+  ) {
+    # more useful output to stderr() should have been printed
+    # already by cli_markitdown() if we are here.
+    errmsg <- stri_flatten(
+      c(
+        paste("markitdown exit code: ", exit_code),
+        if (no_outfile_produced) "No output file produced."
+      ),
+      collapse = "\n"
+    )
+    stop(errmsg)
+  }
+
+  md <- stri_read_lines(outfile)
+  md
+}
+
+
 cli_markitdown <- function(args, ...) {
-  if (is.na(Sys.getenv("PYTHONIOENCODING", NA)))
-    withr::local_envvar("PYTHONIOENCODING" = "utf-8") # needed on windows
+  if (is.na(Sys.getenv("PYTHONIOENCODING", NA))) {
+    withr::local_envvar("PYTHONIOENCODING" = "utf-8")
+  } # needed on windows
 
   reticulate::uv_run_tool(
     "markitdown",
diff --git a/inst/python/_ragnartools/markitdown.py b/inst/python/_ragnartools/markitdown.py
index 979026a..bf3cbab 100644
--- a/inst/python/_ragnartools/markitdown.py
+++ b/inst/python/_ragnartools/markitdown.py
@@ -1,11 +1,8 @@
-import functools
 import markitdown
 from markitdown.converters._markdownify import _CustomMarkdownify
 
 md = markitdown.MarkItDown()
 
-MISSING = object()
-
 
 def maybe_expand_outer_code_fence(text):
     # take a 'pre' string like this:
@@ -45,63 +42,85 @@ def maybe_expand_outer_code_fence(text):
     return text
 
 
-def fence_main(text):
-    return f"____RAGNAR_MAIN_START____{text}____RAGNAR_MAIN_END____"
-
-
 class patched_markitdown:
-    def __init__(self, patch_pre=True, patch_main=True):
-        self.patch_main = patch_main
-        self.patch_pre = patch_pre
+    def __init__(
+        self,
+        html_extract_selectors=None,
+        html_zap_selectors=None,
+    ):
+        self.html_extract_selectors = html_extract_selectors or []
+        self.html_zap_selectors = html_zap_selectors or []
 
     def __enter__(self):
-        if self.patch_pre:
-            self.og_convert_pre = og_convert_pre = _CustomMarkdownify.convert_pre
+        self.og_convert_soup = og_convert_soup = _CustomMarkdownify.convert_soup
 
-            def convert_pre(self, el, text, parent_tags):
-                text = og_convert_pre(self, el, text, parent_tags)
-                return maybe_expand_outer_code_fence(text)
+        def convert_soup(self_, soup):
 
-            _CustomMarkdownify.convert_pre = convert_pre
+            for selector in self.html_extract_selectors:
+                if (tag := soup.select_one(selector)) is not None:
 
-        if self.patch_main:
-            self.og_convert_main = og_convert_main = getattr(
-                _CustomMarkdownify, "convert_main", MISSING
-            )
-            if og_convert_main is MISSING or None:
+                    soup = tag.extract()
 
-                def convert_main(self, el, text, parent_tags):
-                    return fence_main(text)
+            for selector in self.html_zap_selectors:
+                while (tag := soup.select_one(selector)) is not None:
+                    tag.decompose()
 
-            else:
+            return og_convert_soup(self_, soup)
 
-                def convert_main(self, el, text, parent_tags):
-                    text = og_convert_main(self, el, text, parent_tags)
-                    return fence_main(text)
+        _CustomMarkdownify.convert_soup = convert_soup
 
-            _CustomMarkdownify.convert_main = convert_main
+        self.og_convert_pre = og_convert_pre = _CustomMarkdownify.convert_pre
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        _CustomMarkdownify.convert_pre = self.og_convert_pre
-        if self.patch_main:
-            if self.og_convert_main is MISSING:
-                delattr(_CustomMarkdownify, "convert_main")
-            else:
-                _CustomMarkdownify.convert_main = self.og_convert_main
+        def convert_pre(self, el, text, parent_tags):
+            text = og_convert_pre(self, el, text, parent_tags)
+            return maybe_expand_outer_code_fence(text)
 
+        _CustomMarkdownify.convert_pre = convert_pre
 
-def convert_to_markdown(x, *args, main_only=True, **kwargs):
-    with patched_markitdown(patch_main=main_only):
-        result = md.convert(x, *args, **kwargs)
-        text = result.markdown
-
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        _CustomMarkdownify.convert_pre = self.og_convert_pre
+        _CustomMarkdownify.convert_soup = self.og_convert_soup
+
+
+def as_str_list(x):
+    if x is None:
+        return []
+    if isinstance(x, str):
+        return [x]
+    return list(x)
+
+
+def convert_to_markdown(
+    x,
+    *args,
+    html_extract_selectors=None,
+    html_zap_selectors=None,
+    **kwargs,
+):
+    html_extract_selectors = as_str_list(html_extract_selectors)
+    html_zap_selectors = as_str_list(html_zap_selectors)
+
+    # backcompat support for previous 'main_only' arg
+    main_only = kwargs.pop("main_only", None)
+    if main_only is not None:
         if main_only:
-            start = text.find("____RAGNAR_MAIN_START____")
-            end = text.rfind("____RAGNAR_MAIN_END____")
-            if start != -1 and end != -1:
-                text = text[start + len("____RAGNAR_MAIN_START____") : end]
+            if "main" not in html_extract_selectors:
+                html_extract_selectors.insert(0, "main")
+        else:
+            html_extract_selectors = [s for s in html_extract_selectors if s != "main"]
+
+    with patched_markitdown(
+        html_extract_selectors=html_extract_selectors,
+        html_zap_selectors=html_zap_selectors,
+    ):
+        result = md.convert(x, *args, **kwargs)
+        text = result.markdown.strip()
 
         if result.title is not None:
-            text = f"# {result.title}\n\n{text}"
+            title = f"# {result.title}"
+            if not text.startswith(title):
+                text = f"{title}\n\n{text}"
+                
+        text = text.replace("\f", "\n\n---\n\n")
 
         return text
diff --git a/man/read_as_markdown.Rd b/man/read_as_markdown.Rd
index a996193..70141c8 100644
--- a/man/read_as_markdown.Rd
+++ b/man/read_as_markdown.Rd
@@ -4,48 +4,118 @@
 \alias{read_as_markdown}
 \title{Convert files to markdown}
 \usage{
-read_as_markdown(x, ..., canonical = FALSE, main_only = TRUE)
+read_as_markdown(
+  x,
+  ...,
+  canonical = FALSE,
+  html_extract_selectors = c("main"),
+  html_zap_selectors = c("nav")
+)
 }
 \arguments{
-\item{x}{A filepath or url. Accepts a wide variety of file types, including
-PDF, PowerPoint, Word, Excel, Images (EXIF metadata and OCR), Audio (EXIF
-metadata and speech transcription), HTML, Text-based formats (CSV, JSON,
-XML), ZIP files (iterates over contents), Youtube URLs, and EPubs.}
+\item{x}{A filepath or URL. Accepts a wide variety of file types, including
+PDF, PowerPoint, Word, Excel, images (EXIF metadata and OCR), audio (EXIF
+metadata and speech transcription), HTML, text-based formats (CSV, JSON,
+XML), ZIP files (iterates over contents), YouTube URLs, and EPUBs.}
 
-\item{...}{Passed on to \code{MarkItDown.convert()}}
+\item{...}{Passed on to \code{MarkItDown.convert()}.}
 
-\item{canonical}{logical, whether to postprocess the output from MarkItDown
+\item{canonical}{Logical. Whether to postprocess the output from MarkItDown
 with \code{commonmark::markdown_commonmark()}.}
 
-\item{main_only}{logical. Applies only to HTML documents. If \code{TRUE} and a
-\code{main} tag is present in the document, only the contents of the \code{main} tag
-are returned. This is a convenient way to exclude navigational elements
-typically found in sidebars, page headers, and footers.}
+\item{html_extract_selectors}{Character vector of CSS selectors. If a match
+for a selector is found in the document, only the matched node's contents
+are converted. Unmatched extract selectors have no effect.}
+
+\item{html_zap_selectors}{Character vector of CSS selectors. Elements
+matching these selectors will be excluded ("zapped") from the HTML document
+before conversion to markdown. This is useful for removing navigation bars,
+sidebars, headers, footers, or other unwanted elements. By default,
+navigation elements (\code{nav}) are excluded.}
 }
 \value{
-A single string of markdown
+A single string of markdown.
 }
 \description{
 Convert files to markdown
 }
 \examples{
 \dontshow{if (reticulate::py_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
-# convert html
+# Convert HTML
 read_as_markdown("https://r4ds.hadley.nz/base-R.html") |>
-  substr(1, 1000) |>
+  substr(1, 500) |>
   cat()
 
 read_as_markdown("https://r4ds.hadley.nz/base-R.html", canonical = TRUE) |>
-  substr(1, 1000) |>
+  substr(1, 500) |>
   cat()
 
-# convert pdf
+# When converting HTML, you might want to omit certain elements, like
+# sidebars, headers, footers, etc. You can pass CSS selector strings
+# to either extract nodes or exclude nodes during conversion.
+#
+# The easiest way to make selectors is to use SelectorGadget:
+# https://rvest.tidyverse.orgarticles/selectorgadget.html
+#
+# You can also right-click on a page and select "Inspect Element" in a
+# browser to better understand an HTML page's structure.
+#
+# For comprehensive or advanced usage of CSS selectors, consult:
+# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#css-selectors-through-the-cssproperty
+# https://facelessuser.github.io/soupsieveselectors/
+
+url <- "https://duckdb.org/code_of_conduct"
+# Includes the sidebar and other navigational elements
+read_as_markdown(url) |> substr(1, 500) |> writeLines()
+
+# Extract the main content
+read_as_markdown(url, html_extract_selectors = "#main_content_wrap")
+
+# Alternative approach: exclude nodes
+read_as_markdown(
+  url,
+  html_zap_selectors = c(
+    "header",          # node name
+    ".sidenavigation", # node class
+    ".searchoverlay",  # node class
+    "#sidebar"         # node ID
+  )
+) |> substr(1, 500) |> writeLines()
+
+# Quarto example
+url <- "https://quarto.org/docs/computations/python.html"
+
+# Include sidebar, footer, etc.
+read_as_markdown(
+  url,
+  html_extract_selectors = NULL,
+  html_zap_selectors = NULL
+) |> substr(1, 500) |> writeLines()
+
+# Exclude content outside <main>
+read_as_markdown(url, html_extract_selectors = "main") |>
+  substr(1, 500) |> writeLines()
+
+# Exclude specific matching nodes
+read_as_markdown(
+  url,
+  html_extract_selectors = NULL,
+  html_zap_selectors = c(
+    "#quarto-sidebar",
+    "#quarto-margin-sidebar",
+    "header",
+    "footer",
+    "nav"
+  )
+) |> substr(1, 500) |> writeLines()
+
+# Convert PDF
 pdf <- file.path(R.home("doc"), "NEWS.pdf")
 read_as_markdown(pdf) |> substr(1, 1000) |> cat()
-## alternative:
+## Alternative:
 # pdftools::pdf_text(pdf) |> substr(1, 2000) |> cat()
 
-# convert images to markdown descriptions using OpenAI
+# Convert images to markdown descriptions using OpenAI
 jpg <- file.path(R.home("doc"), "html", "logo.jpg")
 if (Sys.getenv("OPENAI_API_KEY") != "") {
   # if (xfun::is_macos()) system("brew install ffmpeg")