apache · damccorm · Feb 18, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 15, 2025
diff --git a/sdks/python/apache_beam/ml/anomaly/univariate/__init__.py b/sdks/python/apache_beam/ml/anomaly/univariate/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/sdks/python/apache_beam/ml/anomaly/univariate/base.py b/sdks/python/apache_beam/ml/anomaly/univariate/base.py
@@ -0,0 +1,88 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import abc
+from collections import deque
+from enum import Enum
+
+
+class BaseTracker(abc.ABC):
+  """Abstract base class for all univariate trackers."""
+  @abc.abstractmethod
+  def push(self, x):
+    """Push a new value to the tracker.
+
+    Args:
+      x: The value to be pushed.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def get(self):
+    """Get the current tracking value.
+
+    Returns:
+      The current tracked value, the type of which depends on the specific
+      tracker implementation.
+    """
+    raise NotImplementedError()
+
+
+class WindowMode(Enum):
+  """Enum representing the window mode for windowed trackers."""
+  #: operating on all data points from the beginning.
+  LANDMARK = 1
+  #: operating on a fixed-size sliding window of recent data points.
+  SLIDING = 2
+
+
+class WindowedTracker(BaseTracker):
+  """Abstract base class for trackers that operate on a data window.
+
+  This class provides a foundation for trackers that maintain a window of data,
+  either as a landmark window or a sliding window. It provides basic push and
+  pop operations.
+
+  Args:
+    window_mode: A `WindowMode` enum specifying whether the window is `LANDMARK`
+      or `SLIDING`.
+    **kwargs: Keyword arguments.
+      For `SLIDING` window mode, `window_size` can be specified to set the
+      maximum size of the sliding window. Defaults to 100.
+  """
+  def __init__(self, window_mode, **kwargs):
+    if window_mode == WindowMode.SLIDING:
+      self._window_size = kwargs.get("window_size", 100)
+      self._queue = deque(maxlen=self._window_size)
+    self._n = 0
+    self._window_mode = window_mode
+
+  def push(self, x):
+    """Adds a new value to the data window.
+
+    Args:
+      x: The value to be added to the window.
+    """
+    self._queue.append(x)
+
+  def pop(self):
+    """Removes and returns the oldest value from the data window (FIFO).
+
+    Returns:
+      The oldest value from the window.
+    """
+    return self._queue.popleft()
diff --git a/sdks/python/apache_beam/ml/anomaly/univariate/mean.py b/sdks/python/apache_beam/ml/anomaly/univariate/mean.py
@@ -0,0 +1,141 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Trackers for calculating mean in windowed fashion.
+
+This module defines different types of mean trackers that operate on windows
+of data. It includes:
+
+  * `SimpleSlidingMeanTracker`: Calculates mean using numpy in a sliding window.
+  * `IncLandmarkMeanTracker`: Incremental mean tracker in landmark window mode.
+  * `IncSlidingMeanTracker`: Incremental mean tracker in sliding window mode.
+"""
+
+import math
+import warnings
+
+import numpy as np
+
+from apache_beam.ml.anomaly.univariate.base import WindowedTracker
+from apache_beam.ml.anomaly.univariate.base import WindowMode
+
+
+class MeanTracker(WindowedTracker):
+  """Abstract base class for mean trackers.
+
+  Currently, it does not add any specific functionality but provides a type
+  hierarchy for mean trackers.
+  """
+  pass
+
+
+class SimpleSlidingMeanTracker(MeanTracker):
+  """Sliding window mean tracker that calculates mean using NumPy.
+
+  This tracker uses NumPy's `nanmean` function to calculate the mean of the
+  values currently in the sliding window. It's a simple, non-incremental
+  approach.
+
+  Args:
+    window_size: The size of the sliding window.
+  """
+  def __init__(self, window_size):
+    super().__init__(window_mode=WindowMode.SLIDING, window_size=window_size)
+
+  def get(self):
+    """Calculates and returns the mean of the current sliding window.
+
+    Returns:
+      float: The mean of the values in the current sliding window.
+             Returns NaN if the window is empty.
+    """
+    if len(self._queue) == 0:
+      return float('nan')
+
+    with warnings.catch_warnings(record=False):
+      warnings.simplefilter("ignore")
+      return np.nanmean(self._queue)
+
+
+class IncMeanTracker(MeanTracker):
+  """Base class for incremental mean trackers.
+
+  This class implements incremental calculation of the mean, which is more
+  efficient for streaming data as it updates the mean with each new data point
+  instead of recalculating from scratch.
+
+  Args:
+    window_mode: A `WindowMode` enum specifying whether the window is `LANDMARK`
+      or `SLIDING`.
+    **kwargs: Keyword arguments passed to the parent class constructor.
+  """
+  def __init__(self, window_mode, **kwargs):
+    super().__init__(window_mode=window_mode, **kwargs)
+    self._mean = 0
+
+  def push(self, x):
+    """Pushes a new value and updates the incremental mean.
+
+    Args:
+      x: The new value to be pushed.
+    """
+    if not math.isnan(x):
+      self._n += 1
+      delta = x - self._mean
+    else:
+      delta = 0
+
+    if self._window_mode == WindowMode.SLIDING:
+      if len(self._queue) >= self._window_size and \
+          not math.isnan(old_x := self.pop()):
+        self._n -= 1
+        delta += (self._mean - old_x)
+
+      super().push(x)
+
+    if self._n > 0:
+      self._mean += delta / self._n
+    else:
+      self._mean = 0
+
+  def get(self):
+    """Returns the current incremental mean.
+
+    Returns:
+      float: The current incremental mean value.
+             Returns NaN if no valid (non-NaN) values have been pushed.
+    """
+    if self._n < 1:
+      # keep it consistent with numpy
+      return float("nan")
+    return self._mean
+
+
+class IncLandmarkMeanTracker(IncMeanTracker):
+  """Landmark window mean tracker using incremental calculation."""
+  def __init__(self):
+    super().__init__(window_mode=WindowMode.LANDMARK)
+
+
+class IncSlidingMeanTracker(IncMeanTracker):
+  """Sliding window mean tracker using incremental calculation.
+
+  Args:
+      window_size: The size of the sliding window.
+  """
+  def __init__(self, window_size):
+    super().__init__(window_mode=WindowMode.SLIDING, window_size=window_size)