Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions ads/opctl/operator/lowcode/anomaly/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
class SupportedModels(str, metaclass=ExtendedEnumMeta):
"""Supported anomaly models."""

AutoMLX = "automlx"
AutoTS = "autots"
Auto = "auto"
# TODS = "tods"
IQR = "iqr"
LOF = "lof"
ZSCORE = "zscore"
ROLLING_ZSCORE = "rolling_zscore"
MAD = "mad"
EE = "ee"
ISOLATIONFOREST = "isolationforest"

class NonTimeADSupportedModels(str, metaclass=ExtendedEnumMeta):
"""Supported non time-based anomaly detection models."""
Expand Down
65 changes: 30 additions & 35 deletions ads/opctl/operator/lowcode/anomaly/model/autots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,80 +4,75 @@
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import pandas as pd

from ads.common.decorator.runtime_dependency import runtime_dependency

from .base_model import AnomalyOperatorBaseModel
from .anomaly_dataset import AnomalyOutput
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
from .anomaly_dataset import AnomalyOutput
from .base_model import AnomalyOperatorBaseModel
from ..const import SupportedModels
from ads.opctl import logger


class AutoTSOperatorModel(AnomalyOperatorBaseModel):
"""Class representing AutoTS Anomaly Detection operator model."""
model_mapping = {
"isolationforest": "IsolationForest",
"lof": "LOF",
"ee": "EE",
"zscore": "zscore",
"rolling_zscore": "rolling_zscore",
"mad": "mad",
"minmax": "minmax",
"iqr": "IQR"
}

@runtime_dependency(
module="autots",
err_msg=(
"Please run `pip3 install autots` to "
"install the required dependencies for AutoTS."
"Please run `pip3 install autots` to "
"install the required dependencies for AutoTS."
),
)
def _build_model(self) -> AnomalyOutput:
from autots.evaluator.anomaly_detector import AnomalyDetector

method = self.spec.model_kwargs.get("method")
transform_dict = self.spec.model_kwargs.get("transform_dict", {})

if method == "random" or method == "deep" or method == "fast":
new_params = AnomalyDetector.get_new_params(method=method)
transform_dict = new_params.pop("transform_dict")

for key, value in new_params.items():
self.spec.model_kwargs[key] = value

if self.spec.model_kwargs.get("output") is None:
self.spec.model_kwargs["output"] = "univariate"

if "transform_dict" not in self.spec.model_kwargs:
self.spec.model_kwargs["transform_dict"] = transform_dict

if self.spec.contamination != 0.1: # TODO: remove hard-coding
self.spec.model_kwargs.get("method_params", {})[
"contamination"
] = self.spec.contamination

model = AnomalyDetector(**self.spec.model_kwargs)
method = SupportedModels.ISOLATIONFOREST if self.spec.model == SupportedModels.AutoTS else self.spec.model
model_params = {"method": self.model_mapping[method],
"transform_dict": self.spec.model_kwargs.get("transform_dict", {}),
"output": self.spec.model_kwargs.get("output", "univariate"), "method_params": {}}
# Supported methods with contamination param
if method in [SupportedModels.ISOLATIONFOREST, SupportedModels.LOF, SupportedModels.EE]:
model_params["method_params"][
"contamination"] = self.spec.contamination if self.spec.contamination else 0.01
else:
if self.spec.contamination:
raise ValueError(f"The contamination parameter is not supported for the selected model \"{method}\"")
logger.info(f"model params: {model_params}")

model = AnomalyDetector(**model_params)

date_column = self.spec.datetime_column.name

anomaly_output = AnomalyOutput(date_column=date_column)

for target, df in self.datasets.full_data_dict.items():
data = df.set_index(date_column)

(anomaly, score) = model.detect(data)

if len(anomaly.columns) == 1:
score.rename(
columns={score.columns.values[0]: OutputColumns.SCORE_COL},
inplace=True,
)
score = 1 - score
score = score.reset_index(drop=False)

col = anomaly.columns.values[0]
anomaly[col] = anomaly[col].replace({1: 0, -1: 1})
anomaly.rename(columns={col: OutputColumns.ANOMALY_COL}, inplace=True)
anomaly = anomaly.reset_index(drop=False)

anomaly_output.add_output(target, anomaly, score)

else:
raise NotImplementedError(
"Multi-Output Anomaly Detection is not yet supported in autots"
)

return anomaly_output

def _generate_report(self):
Expand Down
17 changes: 9 additions & 8 deletions ads/opctl/operator/lowcode/anomaly/model/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from ads.opctl.operator.lowcode.anomaly.utils import select_auto_model

from ..const import NonTimeADSupportedModels, SupportedModels
from ..operator_config import AnomalyOperatorConfig
from .anomaly_dataset import AnomalyDatasets
from .automlx import AutoMLXOperatorModel
from .autots import AutoTSOperatorModel

# from .tods import TODSOperatorModel
from .base_model import AnomalyOperatorBaseModel
from .isolationforest import IsolationForestOperatorModel
from .oneclasssvm import OneClassSVMOperatorModel
from .randomcutforest import RandomCutForestOperatorModel
from ..const import NonTimeADSupportedModels, SupportedModels
from ..operator_config import AnomalyOperatorConfig


class UnSupportedModelError(Exception):
Expand Down Expand Up @@ -45,9 +41,14 @@ class AnomalyOperatorModelFactory:
"""

_MAP = {
SupportedModels.AutoMLX: AutoMLXOperatorModel,
# SupportedModels.TODS: TODSOperatorModel,
SupportedModels.AutoTS: AutoTSOperatorModel,
SupportedModels.IQR: AutoTSOperatorModel,
SupportedModels.LOF: AutoTSOperatorModel,
SupportedModels.ISOLATIONFOREST: AutoTSOperatorModel,
SupportedModels.ZSCORE: AutoTSOperatorModel,
SupportedModels.ROLLING_ZSCORE: AutoTSOperatorModel,
SupportedModels.EE: AutoTSOperatorModel,
SupportedModels.MAD: AutoTSOperatorModel
}

_NonTime_MAP = {
Expand Down
10 changes: 8 additions & 2 deletions ads/opctl/operator/lowcode/anomaly/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,21 @@ spec:
- oneclasssvm
- isolationforest
- randomcutforest
- iqr
- lof
- zscore
- rolling_zscore
- mad
- ee
meta:
description: "The model to be used for anomaly detection"

contamination:
required: false
default: 0.1
default: 0.01
type: float
meta:
description: "Fraction of training dataset corresponding to anomalies (between 0.0 and 0.5)"
description: "The proportion of outliers in the data set. The contamination should be in the range (0, 0.5]"

model_kwargs:
type: dict
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ The Science of Anomaly Detection
Anomaly Detection comes in many forms. We will go through some of these and give guidance as to whether this Operator is going to be helpful for each use case.

* Constructive v Destructive v Pre-Processing: This Operator focuses on the Constructive and Pre-Processing use cases. Destructive can work, but more specific parameters may be required.
* Supervised v Semi-Supervised v Unsupervised: All 3 of these approaches are supported by AutoMLX. AutoTS supports only Unsupervised at this time.
* Time Series. This Operator requires time-series data.
* The operator currently supports only unsupervised learning and works with both time-series and non-time-series data.


Data Parameterization
Expand Down Expand Up @@ -51,40 +50,3 @@ Data Parameterization
datetime_column:
name: ds
target_column: y


Model Parameterization
----------------------

**Specify Model Type**

Sometimes users will know which models they want to use. When users know this in advance, they can specify using the ``model_kwargs`` dictionary. In the following example, we will instruct the model to *only* use the ``IsolationForestOD`` model.

.. code-block:: yaml

kind: operator
type: anomaly
version: v1
spec:
model: automlx
model_kwargs:
model_list:
- IsolationForestOD
search_space:
IsolationForestOD:
n_estimators:
range': [10, 50]
type': 'discrete'


AutoTS offers the same extensibility:

.. code-block:: yaml

kind: operator
type: anomaly
version: v1
spec:
model: autots
model_kwargs:
method: IQR
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ If you have additional variables that you think might be related, then you shoul

**Auto Model Selection**

Operators users don't need to know anything about the underlying models in order to use them. By default we set ``model: auto``. However, some users want more control over the modeling parameters. These users can set the ``model`` parameter to either ``autots`` or ``automlx`` and then pass parameters directly into ``model_kwargs``. See :doc:`Advanced Examples <./advanced_use_cases>`
Operators users don't need to know anything about the underlying models in order to use them. By default we set ``model: auto``. However, some users want more control over the modeling parameters. These users can set the ``model`` parameter to ``isolationforest``, ``lof``, ``ee``, ``zscore``, ``rolling_zscore``, ``mad``, ``mixmax``, ``iqr`` or ``autots`` and then pass parameters directly into ``model_kwargs``. See :doc:`Advanced Examples <./advanced_use_cases>`

**Anomaly Detection Documentation**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ The yaml can also be maximally stated as follows:
datetime_column:
format: "%d/%m/%y"
name: Date
model: automlx
model_kwargs:
time_budget: 100
model: ee
preprocessing: true
generate_metrics: true
generate_report: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ Here is an example anomaly.yaml with every parameter specified:
name: Date
input_data:
url: data.csv
model: auto
model: isolationforest
contamination: 0.005
target_column: target
target_category_columns: ['series']


* **Kind**: The yaml file always starts with ``kind: operator``. There are many other kinds of yaml files that can be run by ``ads opctl``, so we need to specify this is an operator.
Expand All @@ -39,7 +41,8 @@ Here is an example anomaly.yaml with every parameter specified:
* **output_directory**: (optional) This dictionary contains the details for where to put the output artifacts. The directory need not exist, but must be accessible by the Operator during runtime.
* **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/subfolder/``.
* **kwargs**: Insert any other args for pandas to load the data (``format``, ``options``, etc.) See full list in ``YAML Schema`` section.
* **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: ``autots``, and ``auto``.
* **model**: (optional) The name of the model framework you want to use. Defaults to "auto". Other options are: iqr, lof, zscore, rolling_zscore, isolationforest, mad, ee, autots and auto.
* **contamination**: The proportion of outliers in the data set. The contamination should be in the range (0, 0.5]. This parameter is supported only by specific methods, i.e. isolationforest, lof and ee. If used with other models, this parameter will be ignored.
* **model_kwargs**: (optional) This kwargs dict passes straight through to the model framework. If you want to take direct control of the modeling, this is the best way.
* **test_data**: (optional) This dictionary contains the details for how to read the test data. Test data should contain every datetime value of the input_data, (optionally) all of the series from target_category_columns, and a column titles "anomaly" with either a 1 (non-anomalous) or 0 (anomalous).
* **url**: Insert the uri for the dataset if it's on object storage or Data Lake using the URI pattern ``oci://<bucket>@<namespace>/path/to/data.csv``.
Expand Down
7 changes: 3 additions & 4 deletions tests/operators/anomaly/test_anomaly_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from ads.opctl.operator.lowcode.anomaly.const import NonTimeADSupportedModels
from ads.opctl.operator.lowcode.anomaly.const import NonTimeADSupportedModels, SupportedModels
import yaml
import subprocess
import pandas as pd
Expand All @@ -16,8 +16,7 @@
from datetime import datetime
from ads.opctl.operator.cmd import run


MODELS = ["autots"] # "automlx",
MODELS = ["autots", "iqr", "lof", "zscore", "rolling_zscore", "mad", "ee", "isolationforest"]

# Mandatory YAML parameters
TEMPLATE_YAML = {
Expand Down Expand Up @@ -218,7 +217,7 @@ def test_load_datasets(model, data_dict):
yaml_i = deepcopy(TEMPLATE_YAML)
yaml_i["spec"]["model"] = model
yaml_i["spec"]["input_data"]["url"] = data_dict["url"]
if model in NonTimeADSupportedModels.values():
if model in set(NonTimeADSupportedModels.values()) - set(SupportedModels.values()):
del yaml_i["spec"]["datetime_column"]
else:
yaml_i["spec"]["datetime_column"]["name"] = data_dict["dt_col"]
Expand Down