diff --git a/deepmd/main.py b/deepmd/main.py
index 14c0390bdc..492f3b085e 100644
--- a/deepmd/main.py
+++ b/deepmd/main.py
@@ -112,7 +112,7 @@ def main_parser() -> argparse.ArgumentParser:
     if default_backend not in BACKEND_TABLE.keys():
         raise ValueError(
             f"Unknown backend {default_backend}. "
-            "Please set DP_BACKEND to either tensorflow or pytorch."
+            "Please set DP_BACKEND to either tensorflow, pytorch, or paddle."
         )
 
     parser_backend = parser.add_mutually_exclusive_group()
@@ -312,7 +312,7 @@ def main_parser() -> argparse.ArgumentParser:
         "--output",
         type=str,
         default="frozen_model",
-        help="Filename (prefix) of the output model file. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth",
+        help="Filename (prefix) of the output model file. TensorFlow backend: suffix is .pb; PyTorch backend: suffix is .pth; Paddle backend: suffix is .json and .pdiparams",
     )
     parser_frz.add_argument(
         "-n",
diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py
index ba3531bc8a..44d0d16439 100644
--- a/deepmd/pd/train/training.py
+++ b/deepmd/pd/train/training.py
@@ -54,6 +54,7 @@
 )
 from deepmd.pd.utils.env import (
     CINN,
+    CINN_ALLOW_DYNAMIC_SHAPE,
     DEFAULT_PRECISION,
     DEVICE,
     JIT,
@@ -609,49 +610,65 @@ def warm_up_linear(step, warmup_steps):
             )
 
             backend = "CINN" if CINN else None
-            # NOTE: This is a trick to decide the right input_spec for wrapper.forward
-            _, label_dict, _ = self.get_data(is_train=True)
-
-            # Define specification templates
-            spec_templates = {
-                "find_box": np.float32(1.0),
-                "find_coord": np.float32(1.0),
-                "find_numb_copy": np.float32(0.0),
-                "numb_copy": static.InputSpec([1, 1], "int64", name="numb_copy"),
-                "find_energy": np.float32(1.0),
-                "energy": static.InputSpec([1, 1], "float64", name="energy"),
-                "find_force": np.float32(1.0),
-                "force": static.InputSpec([1, -1, 3], "float64", name="force"),
-                "find_virial": np.float32(0.0),
-                "virial": static.InputSpec([1, 9], "float64", name="virial"),
-                "natoms": static.InputSpec([1, -1], "int32", name="natoms"),
-            }
-            # Build spec only for keys present in sample data
-            label_dict_spec = {
-                k: spec_templates[k] for k in label_dict.keys() if k in spec_templates
-            }
-            self.wrapper.forward = jit.to_static(
-                backend=backend,
-                input_spec=[
-                    static.InputSpec([1, -1, 3], "float64", name="coord"),  # coord
-                    static.InputSpec([1, -1], "int32", name="atype"),  # atype
-                    None,  # spin
-                    static.InputSpec([1, 9], "float64", name="box"),  # box
-                    static.InputSpec([], "float64", name="cur_lr"),  # cur_lr
-                    label_dict_spec,  # label,
-                    # None, # task_key
-                    # False, # inference_only
-                    # False, # do_atomic_virial
-                    # None, # fparam
-                    # None, # aparam
-                ],
-                full_graph=True,
-            )(self.wrapper.forward)
+            if CINN_ALLOW_DYNAMIC_SHAPE:
+                # Build spec only for keys present in sample data
+                # NOTE: This is a trick to decide the right input_spec for wrapper.forward
+                _, label_dict, _ = self.get_data(is_train=True)
+                # Define specification templates
+                spec_templates = {
+                    "find_box": np.float32(1.0),
+                    "find_coord": np.float32(1.0),
+                    "find_numb_copy": np.float32(0.0),
+                    "numb_copy": static.InputSpec([1, 1], "int64", name="numb_copy"),
+                    "find_energy": np.float32(1.0),
+                    "energy": static.InputSpec([1, 1], "float64", name="energy"),
+                    "find_force": np.float32(1.0),
+                    "force": static.InputSpec([1, -1, 3], "float64", name="force"),
+                    "find_virial": np.float32(0.0),
+                    "virial": static.InputSpec([1, 9], "float64", name="virial"),
+                    "natoms": static.InputSpec([1, -1], "int32", name="natoms"),
+                }
+                label_dict_spec = {
+                    k: spec_templates[k]
+                    for k in label_dict.keys()
+                    if k in spec_templates
+                }
+                self.wrapper.forward = jit.to_static(
+                    backend=backend,
+                    input_spec=[
+                        static.InputSpec([1, -1, 3], "float64", name="coord"),  # coord
+                        static.InputSpec([1, -1], "int32", name="atype"),  # atype
+                        None,  # spin
+                        static.InputSpec([1, 9], "float64", name="box"),  # box
+                        static.InputSpec([], "float64", name="cur_lr"),  # cur_lr
+                        label_dict_spec,  # label,
+                        # None, # task_key
+                        # False, # inference_only
+                        # False, # do_atomic_virial
+                        # None, # fparam
+                        # None, # aparam
+                    ],
+                    full_graph=True,
+                )(self.wrapper.forward)
+            else:
+                self.wrapper.forward = jit.to_static(full_graph=True, backend=backend)(
+                    self.wrapper.forward
+                )
 
             log.info(
-                "Enable CINN during training, there may be some additional "
-                "compilation time in the first traning step."
+                "[CINN] Enable CINN during training, there may be some additional "
+                "compilation time in the first training step."
             )
+            if not CINN_ALLOW_DYNAMIC_SHAPE:
+                log.info(
+                    "[CINN] Dynamic shape is disabled (CINN_ALLOW_DYNAMIC_SHAPE=0). "
+                    "Make sure the input batch shapes are fixed during training. "
+                    "This is recommended for optimal performance, e.g., as in examples/water."
+                )
+                log.info(
+                    "[CINN] If batch data from your dataset(s) has varying input shapes, consider setting "
+                    "CINN_ALLOW_DYNAMIC_SHAPE=1 to enable dynamic shape support."
+                )
 
         if dist.is_available() and dist.is_initialized():
             # DDP will guarantee the model parameters are identical across all processes
diff --git a/deepmd/pd/utils/env.py b/deepmd/pd/utils/env.py
index c6b9b4eab5..28606d0945 100644
--- a/deepmd/pd/utils/env.py
+++ b/deepmd/pd/utils/env.py
@@ -69,6 +69,14 @@ def to_bool(flag: int | bool | str) -> bool:
         "installation or recompiling with CINN enabled."
     )
 
+# NOTE: Allow the CINN compiler to optimize inputs with dynamic shapes,
+# may lead to a slight performance decrease compared to static shapes.
+
+# If you can confirm that the shape of the input tensors will not change,
+# you can set it to False to further enhance performance.
+# Otherwise, please use the default value(True) to improve runtime compatibility.
+CINN_ALLOW_DYNAMIC_SHAPE = to_bool(os.environ.get("CINN_ALLOW_DYNAMIC_SHAPE", True))
+
 CACHE_PER_SYS = 5  # keep at most so many sets per sys in memory
 ENERGY_BIAS_TRAINABLE = True
 CUSTOM_OP_USE_JIT = to_bool(os.environ.get("CUSTOM_OP_USE_JIT", False))
@@ -199,6 +207,7 @@ def enable_prim(enable: bool = True):
 __all__ = [
     "CACHE_PER_SYS",
     "CINN",
+    "CINN_ALLOW_DYNAMIC_SHAPE",
     "CUSTOM_OP_USE_JIT",
     "DEFAULT_PRECISION",
     "DEVICE",
diff --git a/doc/train/training.md b/doc/train/training.md
index 6c8b7a5549..ca0b46c0ef 100644
--- a/doc/train/training.md
+++ b/doc/train/training.md
@@ -34,7 +34,10 @@ $ dp --pd train input.json
 
 # [experimental] training model with CINN compiler for better performance,
 # see: https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/paddle_v3_features/cinn_cn.html
+## If the shape(s) of batch input data are dynamic during training(default).
 $ CINN=1 dp --pd train input.json
+## If the shape(s) of batch input data are fixed during training, e.g., examples/water.
+$ CINN=1 CINN_ALLOW_DYNAMIC_SHAPE=0 dp --pd train input.json
 ```
 
 :::