diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 8f5431d3ffa..e39edad9c39 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -72,7 +72,25 @@ def get_symmetric_quantization_config( act_qmax: int = 127, weight_qmin: int = -127, weight_qmax: int = 127, -): +) -> QuantizationConfig: + """Create symmetric quantization config for activations and weights. + + Args: + is_per_channel (bool): Whether to use per-channel quantization for + weights. + is_qat (bool): Whether the configuration targets quantization aware + training. + is_dynamic (bool): Whether to generate dynamic activation observers. + act_qmin (int): Minimum activation quantization value. + act_qmax (int): Maximum activation quantization value. + weight_qmin (int): Minimum weight quantization value. + weight_qmax (int): Maximum weight quantization value. + + Returns: + QuantizationConfig: Quantization settings for activations, weights, and + bias. + + """ extra_args: Dict[str, Any] = {"eps": 2**-12} if is_qat: if is_dynamic: @@ -169,23 +187,26 @@ def get_symmetric_a16w8_quantization_config( weight_qmin: int = -127, weight_qmax: int = 127, epsilon: float = 2**-12, -): - """ - 16A8W quantization config: 16-bit activations, 8-bit weights. +) -> QuantizationConfig: + """16A8W quantization config: 16-bit activations, 8-bit weights. This configuration provides better accuracy than 8A8W while maintaining reasonable memory usage through 8-bit weights. Args: - is_per_channel: Whether to use per-channel quantization for weights - is_qat: Whether this is for Quantization Aware Training - is_dynamic: Whether to use dynamic quantization - weight_qmin: Minimum quantization value for weights - weight_qmax: Maximum quantization value for weights - epsilon: Value used to pad observed [qmin, qmax] before initial zero point and scale calculation + is_per_channel (bool): Whether to use per-channel quantization for + weights. + is_qat (bool): Whether this is for quantization aware training. + is_dynamic (bool): Whether to use dynamic quantization. + weight_qmin (int): Minimum quantization value for weights. + weight_qmax (int): Maximum quantization value for weights. + epsilon (float): Value used to pad observed [qmin, qmax] before initial + zero-point and scale calculation. Returns: - QuantizationConfig with 16-bit activations and 8-bit weights + QuantizationConfig: Configuration with 16-bit activations and 8-bit + weights. + """ extra_args: Dict[str, Any] = {"eps": epsilon} @@ -244,27 +265,39 @@ def get_symmetric_a16w8_quantization_config( NodeFilterType = Callable[[Node], bool] -"""Type for a Node Filter used by annotators. A Node filter is a function that takes - a Node and returns whether the node should be annotated or not. +"""Type for a Node Filter used by annotators. + +A Node filter is a function that takes a Node and returns whether the node +should be annotated or not. + """ def _get_module_type_filter(tp: Callable) -> NodeFilterType: - """Get the module_type_filter function for a given module type, the filter accepts - a node and checks if the node comes from a module that has certain module type + """Get the module_type_filter function for a given module type. - For example: - node: linear_op = call_function[...](...) # comes from a module with type Block -> Sub -> Linear + The filter accepts a node and checks if the node comes from a module that + has a certain module type. + + Args: + tp (Callable): Module class to match against the graph node metadata. + + Returns: + NodeFilterType: Predicate that returns True for nodes from the module + type. + For example: + node: linear_op = call_function[...](...) # type Block -> Sub -> Linear - >> module_type_filter = _get_module_type_filter(Sub) # submodule with type `Sub`, under the `Block` submodule + >> module_type_filter = _get_module_type_filter(Sub) >> print(module_type_filter(node)) - True # the node is from the submodule `Sub` (same for `Block` and `Linear` as well) - """ + True # the node is from the submodule `Sub` (same for `Block` and `Linear`) + """ tp_str = tp.__module__ + "." + tp.__qualname__ def module_type_filter(n: Node) -> bool: + """Return True if the node originates from the target module type.""" # node_stack example: { # 'L__self___sub': ("L['self'].sub", ), # 'L__self___sub_linear': ("L['self'].sub.linear", ) @@ -279,16 +312,29 @@ def module_type_filter(n: Node) -> bool: def _get_not_module_type_or_name_filter( tp_list: List[Callable], module_name_list: List[str] ) -> NodeFilterType: + """Create a filter that excludes provided module types and names. + + Args: + tp_list (List[Callable]): Module types to exclude from annotation. + module_name_list (List[str]): Module names to exclude from annotation. + + Returns: + NodeFilterType: Filter that returns True when the node does not match + any provided module type or name. + + """ module_type_filters = [_get_module_type_filter(tp) for tp in tp_list] module_name_list_filters = [get_module_name_filter(m) for m in module_name_list] def not_module_type_or_name_filter(n: Node) -> bool: + """Return True when the node matches none of the blocked filters.""" return not any(f(n) for f in module_type_filters + module_name_list_filters) return not_module_type_or_name_filter class TOSAQuantizer(Quantizer): + """Manage quantization annotations for TOSA-compatible backends.""" def __init__( self, compile_spec_or_tosa_spec: TosaSpecification | ArmCompileSpec @@ -314,11 +360,12 @@ def __init__( self.module_name_config: Dict[str, Optional[QuantizationConfig]] = {} def set_global(self, quantization_config: QuantizationConfig) -> TOSAQuantizer: - """ - Set quantization_config for submodules that are not already annotated by name or type filters. + """Set quantization_config for submodules not matched by other filters. Args: - quantization_config: The QuantizationConfig to set as global configuration. + quantization_config (QuantizationConfig): Configuration to apply to + modules that are not captured by name or type filters. + """ self.global_config = quantization_config return self @@ -326,14 +373,17 @@ def set_global(self, quantization_config: QuantizationConfig) -> TOSAQuantizer: def set_module_type( self, module_type: Callable, quantization_config: QuantizationConfig ) -> TOSAQuantizer: - """ - Set quantization_config for a submodule with type: `module_type`, for example: - quantizer.set_module_name(Sub) or quantizer.set_module_name(nn.Linear), it will quantize all supported operator/operator - patterns in the submodule with this module type with the given `quantization_config`. + """Set quantization_config for submodules with a given module type. + + For example, calling set_module_type(Sub) quantizes supported patterns + in each Sub instance with the provided quantization_config. Args: - module_type: The type of the submodule to set the quantization config for. - quantization_config: The QuantizationConfig to set for the submodule. + module_type (Callable): Type whose submodules should use the + provided quantization configuration. + quantization_config (QuantizationConfig): Configuration to apply to + submodules of the given type. + """ self.module_type_config[module_type] = quantization_config return self @@ -341,14 +391,16 @@ def set_module_type( def set_module_name( self, module_name: str, quantization_config: Optional[QuantizationConfig] ) -> TOSAQuantizer: - """ - Set quantization_config for a submodule with name: `module_name`, for example: - quantizer.set_module_name("blocks.sub"), it will quantize all supported operator/operator - patterns in the submodule with this module name with the given `quantization_config` + """Set quantization_config for submodules with a given module name. + + For example, calling set_module_name("blocks.sub") quantizes supported + patterns for that submodule with the provided quantization_config. Args: - module_name: The name of the submodule to set the quantization config for. - quantization_config: The QuantizationConfig to set for the submodule. + module_name (str): Fully qualified module name to configure. + quantization_config (QuantizationConfig): Configuration applied to + the named submodule. + """ # Validate that quantization_config is provided if quantization_config is None: @@ -357,26 +409,28 @@ def set_module_name( return self def set_io(self, quantization_config: QuantizationConfig) -> TOSAQuantizer: - """ - Set quantization_config for input and output nodes. + """Set quantization_config for input and output nodes. Args: - quantization_config: The QuantizationConfig to set for input and output nodes. + quantization_config (QuantizationConfig): Configuration describing + activation quantization for model inputs and outputs. + """ self.io_config = quantization_config return self def transform_for_annotation(self, model: GraphModule) -> GraphModule: - """ - An initial pass for transforming the graph to prepare it for annotation. + """Transform the graph to prepare it for quantization annotation. + Currently transforms scalar values to tensor attributes. Args: - model: The model to transform. + model (GraphModule): Model whose graph will be transformed. + Returns: - The transformed model. - """ + GraphModule: Transformed model prepared for annotation. + """ # TODO: Fix the need to lazily import this. from executorch.backends.arm._passes import ArmPassManager @@ -385,12 +439,16 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule: ) def annotate(self, model: GraphModule) -> GraphModule: - """Performs the quantization annotation on the graph. - Currently only does static quantization annotation. + """Annotate the graph with the configured quantization settings. + + Currently only does static quantization annotation. + Args: - model: The model to annotate statically. + model (GraphModule): Model to annotate statically. + Returns: - The annotated model. + GraphModule: Annotated model ready for export. + """ model = self._annotate_for_static_quantization_config(model) return model @@ -401,14 +459,19 @@ def _annotate_all_static_patterns( quantization_config: Optional[QuantizationConfig], filter_fn: Optional[Callable[[Node], bool]] = None, ) -> GraphModule: - """Loops over all STATIC_OPS and runs the corresponding registered annotator. + """Annotate all static patterns registered for the backend. + Args: - model: The model to annotate statically. - quantization_config: Specifies the QuantizationSpecs for the model's - input activations, output activations, weights and biases. - filter_fn: An optional filter function that takes a node and returns whether the node should be annotated. + model (GraphModule): Model to annotate statically. + quantization_config (Optional[QuantizationConfig]): Quantization + specs for input activations, output activations, weights, and + biases. + filter_fn (Optional[Callable[[Node], bool]]): Optional node filter + specifying which nodes to annotate. + Returns: - The annotated model. + GraphModule: Model populated with quantization annotations. + """ # TODO: implement the support for None to be canceling out previous annotations if quantization_config is None: @@ -420,8 +483,15 @@ def _annotate_all_static_patterns( def _annotate_for_static_quantization_config( self, model: GraphModule ) -> GraphModule: - """Matches the correct QuantizationConfig with the correct module using a filter - when running _annotate_all_static_patterns. + """Match QuantizationConfigs to modules before annotating patterns. + + Args: + model (GraphModule): Model whose modules are being matched to + quantization configs. + + Returns: + GraphModule: Annotated model after applying configured filters. + """ if self.io_config: self._annotate_io(model, self.io_config) @@ -451,6 +521,14 @@ def _annotate_io( model: GraphModule, quantization_config: QuantizationConfig, ): + """Annotate graph inputs and outputs with the provided configuration. + + Args: + model (GraphModule): GraphModule being annotated. + quantization_config (QuantizationConfig): Activation qspecs to apply + to IO nodes. + + """ for node in model.graph.nodes: if is_annotated(node): continue @@ -468,6 +546,7 @@ def _annotate_io( mark_node_as_annotated(node) def validate(self, model: GraphModule) -> None: + """TODO: Implement validation of annotated graph for TOSA backend.""" pass def quantize_with_submodules( @@ -479,10 +558,16 @@ def quantize_with_submodules( """Quantizes a GraphModule in a way such that conditional submodules are handled properly. Args: - model: GraphModule, the model to quantize. - calibration_samples: list[tuple], a list of inputs to used to calibrate the model during quantization. - To properly calibrate a model with submodules, at least one sample per code path is needed. - is_qat: bool, whether to do quantization aware training or not. + model (GraphModule): The model to quantize. + calibration_samples (list[tuple]): A list of inputs to used to + calibrate the model during quantization. To properly calibrate a + model with submodules, at least one sample per code path is + needed. + is_qat (bool): Whether to do quantization aware training or not. + + Returns: + GraphModule: The quantized model. + """ prepare_fn = prepare_qat_pt2e if is_qat else prepare_pt2e @@ -499,11 +584,12 @@ def quantize_with_submodules( class EthosUQuantizer(TOSAQuantizer): - """ - Quantizer supported by the Arm Ethos-U backend. + """Quantizer supported by the Arm Ethos-U backend. Args: - compile_spec: A EthosUCompileSpec instance. + compile_spec (EthosUCompileSpec): Backend compile specification for + Ethos-U targets. + """ def __init__(self, compile_spec: EthosUCompileSpec) -> None: @@ -511,11 +597,12 @@ def __init__(self, compile_spec: EthosUCompileSpec) -> None: class VgfQuantizer(TOSAQuantizer): - """ - Quantizer supported by the Arm Vgf backend. + """Quantizer supported by the Arm Vgf backend. Args: - compile_spec: A VgfCompileSpec instance. + compile_spec (VgfCompileSpec): Backend compile specification for Vgf + targets. + """ def __init__(self, compile_spec: VgfCompileSpec) -> None: diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md index 80ac51ad644..3a8700a63e7 100644 --- a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md +++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md @@ -19,15 +19,33 @@ class EthosUQuantizer(compile_spec: 'EthosUCompileSpec') -> 'None' Quantizer supported by the Arm Ethos-U backend. Args: -- **compile_spec**: A EthosUCompileSpec instance. +- **compile_spec (EthosUCompileSpec)**: Backend compile specification for + Ethos-U targets. + +```python +def EthosUQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False): +``` +Quantizes a GraphModule in a way such that conditional submodules are handled properly. + +Args: +- **model (GraphModule)**: The model to quantize. +- **calibration_samples (list[tuple])**: A list of inputs to used to + calibrate the model during quantization. To properly calibrate a + model with submodules, at least one sample per code path is + needed. +- **is_qat (bool)**: Whether to do quantization aware training or not. + +Returns: +- **GraphModule**: The quantized model. ```python def EthosUQuantizer.set_global(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': ``` -Set quantization_config for submodules that are not already annotated by name or type filters. +Set quantization_config for submodules not matched by other filters. Args: -- **quantization_config**: The QuantizationConfig to set as global configuration. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + modules that are not captured by name or type filters. ```python def EthosUQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': @@ -35,37 +53,45 @@ def EthosUQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> ' Set quantization_config for input and output nodes. Args: -- **quantization_config**: The QuantizationConfig to set for input and output nodes. +- **quantization_config (QuantizationConfig)**: Configuration describing + activation quantization for model inputs and outputs. ```python def EthosUQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` -Set quantization_config for a submodule with name: `module_name`, for example: -quantizer.set_module_name("blocks.sub"), it will quantize all supported operator/operator -patterns in the submodule with this module name with the given `quantization_config` +Set quantization_config for submodules with a given module name. + +For example, calling set_module_name("blocks.sub") quantizes supported +patterns for that submodule with the provided quantization_config. Args: -- **module_name**: The name of the submodule to set the quantization config for. -- **quantization_config**: The QuantizationConfig to set for the submodule. +- **module_name (str)**: Fully qualified module name to configure. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + the named submodule. ```python def EthosUQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': ``` -Set quantization_config for a submodule with type: `module_type`, for example: -quantizer.set_module_name(Sub) or quantizer.set_module_name(nn.Linear), it will quantize all supported operator/operator -patterns in the submodule with this module type with the given `quantization_config`. +Set quantization_config for submodules with a given module type. + +For example, calling set_module_type(Sub) quantizes supported patterns +in each Sub instance with the provided quantization_config. Args: -- **module_type**: The type of the submodule to set the quantization config for. -- **quantization_config**: The QuantizationConfig to set for the submodule. +- **module_type (Callable)**: Type whose submodules should use the + provided quantization configuration. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + submodules of the given type. ```python def EthosUQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule': ``` -An initial pass for transforming the graph to prepare it for annotation. +Transform the graph to prepare it for quantization annotation. + Currently transforms scalar values to tensor attributes. Args: -- **model**: The model to transform. +- **model (GraphModule)**: Model whose graph will be transformed. + Returns: - The transformed model. +- **GraphModule**: Transformed model prepared for annotation. diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md index 68f77249885..23f3246eb6b 100644 --- a/docs/source/backends/arm-vgf/arm-vgf-quantization.md +++ b/docs/source/backends/arm-vgf/arm-vgf-quantization.md @@ -21,15 +21,33 @@ class VgfQuantizer(compile_spec: 'VgfCompileSpec') -> 'None' Quantizer supported by the Arm Vgf backend. Args: -- **compile_spec**: A VgfCompileSpec instance. +- **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf + targets. + +```python +def VgfQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False): +``` +Quantizes a GraphModule in a way such that conditional submodules are handled properly. + +Args: +- **model (GraphModule)**: The model to quantize. +- **calibration_samples (list[tuple])**: A list of inputs to used to + calibrate the model during quantization. To properly calibrate a + model with submodules, at least one sample per code path is + needed. +- **is_qat (bool)**: Whether to do quantization aware training or not. + +Returns: +- **GraphModule**: The quantized model. ```python def VgfQuantizer.set_global(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': ``` -Set quantization_config for submodules that are not already annotated by name or type filters. +Set quantization_config for submodules not matched by other filters. Args: -- **quantization_config**: The QuantizationConfig to set as global configuration. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + modules that are not captured by name or type filters. ```python def VgfQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': @@ -37,37 +55,45 @@ def VgfQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOS Set quantization_config for input and output nodes. Args: -- **quantization_config**: The QuantizationConfig to set for input and output nodes. +- **quantization_config (QuantizationConfig)**: Configuration describing + activation quantization for model inputs and outputs. ```python def VgfQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` -Set quantization_config for a submodule with name: `module_name`, for example: -quantizer.set_module_name("blocks.sub"), it will quantize all supported operator/operator -patterns in the submodule with this module name with the given `quantization_config` +Set quantization_config for submodules with a given module name. + +For example, calling set_module_name("blocks.sub") quantizes supported +patterns for that submodule with the provided quantization_config. Args: -- **module_name**: The name of the submodule to set the quantization config for. -- **quantization_config**: The QuantizationConfig to set for the submodule. +- **module_name (str)**: Fully qualified module name to configure. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + the named submodule. ```python def VgfQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': ``` -Set quantization_config for a submodule with type: `module_type`, for example: -quantizer.set_module_name(Sub) or quantizer.set_module_name(nn.Linear), it will quantize all supported operator/operator -patterns in the submodule with this module type with the given `quantization_config`. +Set quantization_config for submodules with a given module type. + +For example, calling set_module_type(Sub) quantizes supported patterns +in each Sub instance with the provided quantization_config. Args: -- **module_type**: The type of the submodule to set the quantization config for. -- **quantization_config**: The QuantizationConfig to set for the submodule. +- **module_type (Callable)**: Type whose submodules should use the + provided quantization configuration. +- **quantization_config (QuantizationConfig)**: Configuration to apply to + submodules of the given type. ```python def VgfQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule': ``` -An initial pass for transforming the graph to prepare it for annotation. +Transform the graph to prepare it for quantization annotation. + Currently transforms scalar values to tensor attributes. Args: -- **model**: The model to transform. +- **model (GraphModule)**: Model whose graph will be transformed. + Returns: - The transformed model. +- **GraphModule**: Transformed model prepared for annotation.