NatLabRockies · ppinchuk · May 8, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
@@ -9,6 +9,7 @@
 from .ordinance import (
     BaseTextExtractor,
     BaseParser,
+    DocSelectionMethod,
     KeywordBasedHeuristic,
     PromptBasedTextCollector,
     PromptBasedTextExtractor,

@@ -11,6 +11,7 @@
     NoOpHeuristic,
     NoOpTextCollector,
     NoOpTextExtractor,
+    DocSelectionMethod,
     PromptBasedTextCollector,
     PromptBasedTextExtractor,
     OrdinanceExtractionPlugin,
@@ -123,11 +124,26 @@ def create_schema_based_one_shot_extraction_plugin(config, tech):  # noqa: C901
               may provide a custom system prompt if you want to provide
               more specific instructions to the LLM for the structured
               data extraction step.
-            - `allow_multi_doc_extraction`: Boolean flag indicating
-              whether to allow multiple documents to be used for the
-              extraction context simultaneously. By default, ``False``,
-              which means the first document that returns some extracted
-              data will be marked as the source.
+            - `doc_selection_method`: String defining the multi-doc
+              selection option. Specifically, if multiple documents pass
+              the filter, this method determines how the documents are
+              submitted to the extraction context. Allowed options are:
+
+                - "single doc": Use the first document that returns some
+                  extracted data as the source document for the
+                  extraction context.
+                - "multi doc context": Submit text from multiple
+                  documents to the extraction context simultaneously.
+                - "multi doc all": Each document is extracted separately
+                  and the results concatenated. This may give duplicated
+                  feature results if the same feature is mentioned in
+                  multiple documents.
+                - "multi doc mixed": Each document is extracted
+                  separately and the results are merged together at the
+                  end. In this approach, each feature is reported at
+                  most once.
+
+              By default, ``"single doc"``.
 
     tech : str
         Technology identifier to use for the plugin (e.g., "wind",
@@ -161,10 +177,27 @@ class SchemaBasedExtractionPlugin(OrdinanceExtractionPlugin):
         SCHEMA = config["schema"]
         """dict: Schema for the output of the text extraction step"""
 
-        ALLOW_MULTI_DOC_EXTRACTION = config.get(
-            "allow_multi_doc_extraction", False
+        DOC_SELECTION_METHOD = DocSelectionMethod.normalize(
+            config.get("doc_selection_method", "single doc")
         )
-        """bool: Whether to allow extraction over multiple documents"""
+        """str: Method for selecting documents for extraction context
+
+        Allowed options:
+
+            - "single doc": Use the first document that returns some
+              extracted data as the source document for the extraction
+              context.
+            - "multi doc context": Submit text from multiple documents
+              to the extraction context simultaneously.
+            - "multi doc all": Each document is extracted separately
+              and the results concatenated. This may give duplicated
+              feature results if the same feature is mentioned in
+              multiple documents.
+            - "multi doc mixed": Each document is extracted separately
+              and the results are merged together at the end. In this
+              approach, each feature is reported at most once.
+
+        """
 
         IDENTIFIER = tech
         """str: Identifier for extraction task """