PrimeIntellect-ai · willccbb · May 14, 2026 · May 14, 2026
diff --git a/.semgrep/verifiers.yml b/.semgrep/verifiers.yml
@@ -5,10 +5,19 @@ rules:
     message: Do not use `from __future__ import annotations`; quote only the specific forward references that need it.
     pattern: from __future__ import annotations
 
+  - id: verifiers-no-skip-validation
+    languages: [python]
+    severity: ERROR
+    message: SkipValidation hides type errors; use precise types and validators, or add a narrow nosemgrep waiver with a protocol-boundary rationale.
+    pattern-either:
+      - pattern: SkipValidation[$TYPE]
+      - pattern: $P.SkipValidation[$TYPE]
+      - pattern: from pydantic import SkipValidation
+
   - id: verifiers-v1-config-param-one-type
     languages: [python]
     severity: ERROR
-    message: Public v1 `config` parameters must be one concrete config type or `None`; keep raw mappings at explicit config-loader boundaries.
+    message: Public v1 `config` parameters must be one concrete config type; keep raw mappings at explicit config-loader boundaries.
     paths:
       include:
         - /verifiers/v1/**/*.py
@@ -24,6 +33,27 @@ rules:
           metavariable: $ANNOT
           regex: "(Any|ConfigMap|Mapping\\[str, object\\]|dict\\[str, object\\]|.*\\|.*\\|.*)"
 
+  - id: verifiers-v1-loaders-require-config
+    languages: [python]
+    severity: ERROR
+    message: Public `load_taskset` and `load_harness` loaders must require one concrete config object; defaults are supplied by the typed EnvConfig.
+    paths:
+      include:
+        - /environments/**/*.py
+    pattern-either:
+      - pattern: |
+          def load_taskset(..., config: $CONFIG | None = None, ...):
+              ...
+      - pattern: |
+          def load_harness(..., config: $CONFIG | None = None, ...):
+              ...
+      - pattern: |
+          def load_taskset(..., config: Optional[$CONFIG] = None, ...):
+              ...
+      - pattern: |
+          def load_harness(..., config: Optional[$CONFIG] = None, ...):
+              ...
+
   - id: verifiers-no-private-framework-classes
     languages: [python]
     severity: ERROR

diff --git a/README.md b/README.md
@@ -99,6 +99,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
 ```bash
 prime env init my-env # creates a new template in ./environments/my_env
 ```
+Add an explicit harness loader when the environment owns harness behavior:
+```bash
+prime env init my-env --with-harness
+```
 For OpenEnv integration, use:
 ```bash
 prime env init my-openenv --openenv
@@ -116,7 +120,9 @@ environments/my_env/
 └── README.md           # Documentation
 ```
 
-Environment modules should expose a `load_environment` function which returns an instance of the Environment object, and which can accept custom arguments. For example: 
+Environment modules should expose a `load_environment` function which returns an
+environment object. For simple legacy environments, this can still be a direct
+constructor:
 ```python
 # my_env.py
 import verifiers as vf
@@ -148,7 +154,7 @@ def source():
 async def contains_answer(task, state) -> float:
     return float(task["answer"] in str(state.get("completion") or ""))
 
-def load_taskset(config: vf.TasksetConfig | None = None):
+def load_taskset(config: vf.TasksetConfig):
     return vf.Taskset(source=source, rewards=[contains_answer], config=config)
 
 def load_environment(config: vf.EnvConfig) -> vf.Env:
@@ -169,8 +175,8 @@ env = vf.Env(
 ```
 
 The same environment package is the unit used by evals and `prime-rl`. The
-trainer owns model, endpoint, sampling, and rollout count; v1-specific taskset
-and harness options stay under `env.taskset` and `env.harness`:
+trainer owns model, endpoint, sampling, and rollout count; v1-specific options
+stay on the taskset or harness config that owns them:
 
 ```toml
 # configs/rl/my-v1-env.toml
@@ -185,12 +191,12 @@ max_tokens = 4096
 [[env]]
 id = "my-env"
 
-[env.args]
-arg1 = "non-th-arg"
-
 [env.harness]
 max_turns = 1
 
+[env.taskset]
+split = "train"
+
 [env.taskset.scoring.contains_answer]
 weight = 1.0
 ```

diff --git a/assets/lab/environments/AGENTS.md b/assets/lab/environments/AGENTS.md
@@ -693,14 +693,18 @@ environments/my_env/
 └── README.md          # documentation template
 ```
 
-The environment file must export a `load_environment()` function that returns a `vf.Environment`. Explicitly declare any arguments your environment accepts:
+The environment file exports a taskset-first v1 loader:
 
 ```python
 import verifiers as vf
 
-def load_environment(difficulty: str = "easy", num_examples: int = -1) -> vf.Environment:
-    # build dataset, rubric, etc.
-    return vf.SingleTurnEnv(dataset=dataset, rubric=rubric)
+
+def load_taskset(config: vf.TasksetConfig) -> vf.Taskset:
+    return vf.Taskset(source=source, rewards=[reward_fn], config=config)
+
+
+def load_environment(config: vf.EnvConfig) -> vf.Env:
+    return vf.Env(taskset=load_taskset(config=config.taskset))
 ```
 
 ### pyproject.toml

diff --git a/docs/byo-harness.md b/docs/byo-harness.md
@@ -50,7 +50,7 @@ async def contains_answer(task, state) -> float:
     return float(task["answer"] in str(state.get("completion") or ""))
 
 
-def load_taskset(config: vf.TasksetConfig | None = None):
+def load_taskset(config: vf.TasksetConfig):
     return vf.Taskset(source=source, rewards=[contains_answer], config=config)
 
 
@@ -74,8 +74,7 @@ class GSM8KTasksetConfig(vf.TasksetConfig):
     split: str = "train"
 
 
-def load_taskset(config: vf.TasksetConfig | None = None):
-    config = GSM8KTasksetConfig(config)
+def load_taskset(config: GSM8KTasksetConfig):
     dataset_name = config.dataset_name
     split = config.split
 
@@ -273,7 +272,7 @@ Create a harness when rollout behavior is no longer just "call the model with
 the resolved taskset tools."
 
 ```python
-def load_harness(config: vf.HarnessConfig | None = None):
+def load_harness(config: vf.HarnessConfig):
     return vf.Harness(
         program={"fn": "my_env.program:run"},
         config=config,
@@ -422,8 +421,7 @@ def load_environment(config: vf.EnvConfig):
     )
 ```
 
-Eval config passes named environment args through `args` and v1 config through
-the `taskset`/`harness` sections:
+Eval config passes v1 config through the `taskset`/`harness` sections:
 
 ```toml
 model = "openai/gpt-5.4-mini"
@@ -441,47 +439,32 @@ max_turns = 4
 weight = 0.5
 ```
 
-For concise named args, define one typed args object and pass it as `args`.
-`EnvConfig.args` is intentionally user-defined; environment packages decide how
-those args flow into taskset and harness construction.
+For environment-specific settings, define leaf fields on the taskset or harness
+config that owns them. An `EnvConfig` subclass only fixes the concrete taskset
+and harness config types for the loader.
 
 ```python
-class MyEnvArgsConfig(vf.Config):
+class MyTasksetConfig(vf.TasksetConfig):
     split: str = "train"
-    max_turns: int = 10
 
 
-class MyTasksetConfig(vf.TasksetConfig):
-    split: str = "train"
+class MyEnvConfig(vf.EnvConfig):
+    taskset: MyTasksetConfig
+    harness: vf.HarnessConfig
 
 
-def load_taskset(config: vf.TasksetConfig | None = None):
-    config = MyTasksetConfig(config)
+def load_taskset(config: MyTasksetConfig):
     ...
 
 
-def load_harness(config: vf.HarnessConfig | None = None):
-    config = vf.HarnessConfig(config)
+def load_harness(config: vf.HarnessConfig):
     ...
 
 
-def load_environment(
-    config: vf.EnvConfig,
-    split: str = "train",
-    max_turns: int = 10,
-):
-    config = vf.EnvConfig(
-        config,
-        args=MyEnvArgsConfig(split=split, max_turns=max_turns),
-    )
-    args = MyEnvArgsConfig(config.args)
+def load_environment(config: MyEnvConfig):
     return vf.Env(
-        taskset=load_taskset(
-            config=MyTasksetConfig(config.taskset, split=args.split)
-        ),
-        harness=load_harness(
-            config=vf.HarnessConfig(config.harness, max_turns=args.max_turns)
-        ),
+        taskset=load_taskset(config=config.taskset),
+        harness=load_harness(config=config.harness),
     )
 ```
 
@@ -499,12 +482,12 @@ max_tokens = 4096
 [[env]]
 id = "primeintellect/my-v1-env"
 
-[env.args]
-arg1 = "non-th-arg"
-
 [env.harness]
 max_turns = 8
 
+[env.taskset]
+split = "train"
+
 [env.taskset.toolsets.search]
 tools = ["my_env.tools:search"]
 bindings = { "search.index" = "objects.index" }

diff --git a/docs/development.md b/docs/development.md
@@ -257,6 +257,9 @@ uv run pytest tests/test_file.py::test_name -vvs --pdb
 # Initialize template
 prime env init my-environment
 
+# Include an explicit harness loader when needed
+prime env init my-environment --with-harness
+
 # Install locally for testing
 prime env install my-environment
 
@@ -317,7 +320,7 @@ uv run ruff format --check verifiers tests  # Verify Python formatting
 uv run ty check verifiers             # Type check (matches CI Ty target)
 
 # Environment tools
-prime env init new-env                       # Create environment
+prime env init new-env                       # Create taskset-first v1 environment
 prime env install new-env                    # Install environment
 prime eval run new-env -m openai/gpt-4.1-mini -n 5  # Test environment
 prime eval tui                               # Browse evals in the tree browser

diff --git a/docs/environments.md b/docs/environments.md
@@ -687,14 +687,18 @@ environments/my_env/
 └── README.md          # documentation template
 ```
 
-The environment file must export a `load_environment()` function that returns a `vf.Environment`. Explicitly declare any arguments your environment accepts:
+The environment file exports a taskset-first v1 loader:
 
 ```python
 import verifiers as vf
 
-def load_environment(difficulty: str = "easy", num_examples: int = -1) -> vf.Environment:
-    # build dataset, rubric, etc.
-    return vf.SingleTurnEnv(dataset=dataset, rubric=rubric)
+
+def load_taskset(config: vf.TasksetConfig) -> vf.Taskset:
+    return vf.Taskset(source=source, rewards=[reward_fn], config=config)
+
+
+def load_environment(config: vf.EnvConfig) -> vf.Env:
+    return vf.Env(taskset=load_taskset(config=config.taskset))
 ```
 
 ### pyproject.toml

diff --git a/docs/evaluation.md b/docs/evaluation.md
@@ -68,7 +68,8 @@ The positional argument accepts two formats:
 
 Environment IDs are converted to Python module names (`my-env` → `my_env`) and imported. Modules must be installed (via `prime env install` or `uv pip install`).
 
-The `--env-args` flag passes arguments to your `load_environment()` function:
+For legacy or direct-constructor environments, the `--env-args` flag passes
+arguments to your `load_environment()` function:
 
 ```bash
 prime eval run my-env -a '{"difficulty": "hard", "num_examples": 100}'
@@ -371,15 +372,15 @@ optional:
 |-------|------|-------------|
 | `id` | string | **Required.** Environment module name |
 | `args` | table | Arguments passed to `load_environment()` |
-| `taskset` | table | v1 taskset config passed through `config.taskset` |
-| `harness` | table | v1 harness config passed through `config.harness` |
+| `taskset` | table | v1 taskset config passed through `EnvConfig.taskset` |
+| `harness` | table | v1 harness config passed through `EnvConfig.harness` |
 | `num_examples` | integer | Number of dataset examples to evaluate |
 | `rollouts_per_example` | integer | Rollouts per example |
 | `extra_env_kwargs` | table | Arguments passed to environment constructor |
 | `model` | string | Model to evaluate |
 | `endpoint_id` | string | Endpoint registry id (requires TOML `endpoints_path`) |
 
-Example with environment args:
+Example with legacy environment args:
 
 ```toml
 [[eval]]

diff --git a/docs/overview.md b/docs/overview.md
@@ -52,6 +52,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
 ```bash
 prime env init my-env # creates a new template in ./environments/my_env
 ```
+Add an explicit harness loader when the environment owns harness behavior:
+```bash
+prime env init my-env --with-harness
+```
 
 This will create a new module called `my_env` with a basic environment template.
 ```
@@ -61,7 +65,9 @@ environments/my_env/
 └── README.md           # Documentation
 ```
 
-Environment modules should expose a `load_environment` function which returns an instance of the Environment object, and which can accept custom arguments. For example: 
+Environment modules should expose a `load_environment` function which returns an
+environment object. For simple legacy environments, this can still be a direct
+constructor:
 ```python
 # my_env.py
 import verifiers as vf
@@ -93,7 +99,7 @@ def source():
 async def contains_answer(task, state) -> float:
     return float(task["answer"] in str(state.get("completion") or ""))
 
-def load_taskset(config: vf.TasksetConfig | None = None):
+def load_taskset(config: vf.TasksetConfig):
     return vf.Taskset(source=source, rewards=[contains_answer], config=config)
 
 def load_environment(config: vf.EnvConfig) -> vf.Env:

diff --git a/docs/reference.md b/docs/reference.md
@@ -1009,9 +1009,8 @@ class Config(BaseModel):
     ) -> Self: ...
 
 class EnvConfig(Config):
-    args: object | None = None
-    taskset: object | None = None
-    harness: object | None = None
+    taskset: TasksetConfig
+    harness: HarnessConfig
 
 class TasksetConfig(Config):
     taskset_id: str | None = None
@@ -1030,8 +1029,13 @@ class HarnessConfig(Config):
 ```
 
 `EnvConfig` is the typed v1 loader envelope. TOML `[env.taskset]` and
-`[env.harness]` sections flow to `config.taskset` and `config.harness`;
-environment-specific named args flow through `[env.args]`.
+`[env.harness]` sections populate `EnvConfig.taskset` and `EnvConfig.harness`.
+Environment-specific fields belong on the taskset or harness config that owns
+them; `EnvConfig` subclasses only bind concrete child config types.
+`taskset` must be typed as a `TasksetConfig` subclass, and `harness` must be
+typed as a `HarnessConfig` subclass.
+Annotation-only `Config` fields on `Config` subclasses default to their config
+class, so nested config objects do not need `Field(default_factory=...)`.
 
 `Config` subclasses accept a positional source config plus direct keyword
 overrides. The source object is positional-only so subclasses can define a real

diff --git a/docs/training.md b/docs/training.md
@@ -90,12 +90,12 @@ max_tokens = 4096
 [[env]]
 id = "primeintellect/my-v1-env"
 
-[env.args]
-arg1 = "non-th-arg"
-
 [env.harness]
 max_turns = 8
 
+[env.taskset]
+split = "train"
+
 [env.taskset.toolsets.search]
 tools = ["my_env.tools:search"]
 bindings = { "search.index" = "objects.index" }