simon-p-2000 · simon-p-2000 · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,6 @@
 ## [UNRELEASED] - YYYY-MM-DD
+## Added
+- Add support for activity counts feature ([#262](https://github.com/cbrnr/sleepecg/pull/262) by [Simon Pusterhofer](https://github.com/simon-p-2000))
 
 ## [0.5.9] - 2025-02-01
 ### Added

diff --git a/docs/feature_extraction.md b/docs/feature_extraction.md
@@ -1,9 +1,12 @@
 # Feature extraction
 
 ## Heart rate variability features
+
 Features are based on standards of heart rate variability (HRV) measurement and interpretation described in [Task Force of the European Society of Cardiology (1996)](https://doi.org/10.1161/01.CIR.93.5.1043) and [Shaffer & Ginsberg (2017)](https://doi.org/10.3389/fpubh.2017.00258).
 
+
 ### Time domain
+
 Group identifier: `hrv-time`
 
 All time domain HRV features are either derived from normal-to-normal (NN) intervals, from successive differences between NN intervals (SD), or from the [Poincaré plot (PP)](https://en.wikipedia.org/wiki/Poincar%C3%A9_plot).
@@ -41,7 +44,9 @@ All time domain HRV features are either derived from normal-to-normal (NN) inter
 |`CSI`|cardiac sympathetic index|PP|
 |`CVI`|cardiac vagal index|PP|
 
+
 ### Frequency domain
+
 Group identifier: `hrv-frequency`
 
 For calculating frequency domain HRV features, the RR time series is resampled at regular intervals, after which the power spectral density (PSD) is estimated using [Welch's method](https://en.wikipedia.org/wiki/Welch%27s_method).
@@ -58,6 +63,7 @@ For calculating frequency domain HRV features, the RR time series is resampled a
 
 
 ## Metadata features
+
 Group identifier: `metadata`
 
 |Feature|Description|
@@ -66,3 +72,12 @@ Group identifier: `metadata`
 |`age`|age of the subject in years|
 |`gender`|`0` (female) or `1` (male)|
 |`weight`|weight of the subject in kg|
+
+
+## Actigraphy features
+
+Group identifier: `actigraphy`
+
+| Feature           | Description                                                                                            |
+|-------------------|--------------------------------------------------------------------------------------------------------|
+| `activity_counts` | Philips Actiwatch proprietary metric to quantify amount of patient movement measured via accelerometry |
diff --git a/src/sleepecg/io/sleep_readers.py b/src/sleepecg/io/sleep_readers.py
@@ -493,12 +493,13 @@ def read_mesa(
                 activity_counts = np.array(activity_counts)
 
                 diff = len(activity_counts) - len(parsed_xml.sleep_stages)
-                if np.abs(diff) > 2:
+
+                if abs(diff) > 2:
                     print(f"Skipping {record_id} due to invalid activity counts.")
                     continue
-                elif 0 < diff <= 2:
+                elif diff > 0:
                     activity_counts = activity_counts[:-diff]
-                elif 0 < diff * -1 <= 2:
+                elif diff < 0:
                     activity_counts = np.append(activity_counts, activity_counts[diff:])
 
                 activity_counts[activity_counts == ""] = "0"

diff --git a/tests/test_sleep_readers.py b/tests/test_sleep_readers.py
@@ -24,24 +24,26 @@ def _dummy_nsrr_overlap(filename: str, mesa_ids: list[int]):
             csv.write(f"{mesa_ids[i][-1]},1,20:30:00,20:29:59\n")
 
 
-def _dummy_nsrr_actigraphy(filename: str, mesa_id: str):
+def _dummy_nsrr_actigraphy(filename: str, mesa_id: str, hours: float):
     """Create dummy actigraphy file with four usable activity counts."""
     base_time = datetime.datetime(2024, 1, 1, 20, 30, 0)
-
+    # hours * 3600 / 30 second epoch, additional 20 counts for safety
+    number_activity_counts = int(hours * 120) + 20
     linetimes = [
         (base_time + datetime.timedelta(seconds=30 * i)).strftime("%H:%M:%S")
-        for i in range(10)
+        for i in range(number_activity_counts)
     ]
 
     with open(filename, "w") as csv:
         csv.write("mesaid,line,linetime,activity\n")
-        for i in range(10):
+        for i in range(number_activity_counts):
             csv.write(f"{mesa_id[-1]},{1 + i},{linetimes[i]},10\n")
 
 
-def _dummy_nsrr_actigraphy_cached(filename: str):
+def _dummy_nsrr_actigraphy_cached(filename: str, hours: float):
     """Create dummy npy file that resembles cached activity counts."""
-    activity_counts = np.array([10, 10, 10, 10, 10, 10])
+    number_activity_counts = int(hours * 120)
+    activity_counts = np.array([10 for i in range(number_activity_counts)])
     np.save(filename, activity_counts)
 
 
@@ -54,7 +56,6 @@ def _dummy_nsrr_edf(filename: str, hours: float, ecg_channel: str):
 
 def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
     EPOCH_LENGTH = 30
-    RECORDING_DURATION = 154.0
     STAGES = [
         "Wake|0",
         "Stage 1 sleep|1",
@@ -66,7 +67,7 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
     ]
 
     rng = np.random.default_rng(random_state)
-
+    record_duration = hours * 60 * 60
     with open(filename, "w") as xml_file:
         xml_file.write(
             '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
@@ -76,16 +77,16 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
             "<ScoredEvent>\n"
             "<EventType/>\n"
             "<EventConcept>Recording Start Time</EventConcept>\n"
-            f"<Duration>{RECORDING_DURATION}</Duration>\n"
+            f"<Duration>{record_duration}</Duration>\n"
             "<ClockTime>01.01.85 20.29.59</ClockTime>\n"
             "</ScoredEvent>\n",
         )
-        record_duration = hours * 60 * 60
         start = 0
-        while True:
-            if start > record_duration:
-                break
-            epoch_duration = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
+        while start < record_duration:
+            # choose a candidate epoch duration in seconds.
+            epoch_duration_candidate = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
+            # use the remaining time if the candidate overshoots the record duration
+            epoch_duration = min(epoch_duration_candidate, record_duration - start)
             stage = rng.choice(STAGES)
             xml_file.write(
                 "<ScoredEvent>\n"
@@ -134,9 +135,11 @@ def _create_dummy_mesa(
         _dummy_nsrr_edf(f"{edf_dir}/{record_id}.edf", hours, ecg_channel="EKG")
         _dummy_nsrr_xml(f"{annotations_dir}/{record_id}-nsrr.xml", hours, random_state)
         if actigraphy:
-            _dummy_nsrr_actigraphy(f"{activity_dir}/{record_id}.csv", mesa_id=record_id)
+            _dummy_nsrr_actigraphy(
+                f"{activity_dir}/{record_id}.csv", mesa_id=record_id, hours=hours
+            )
             _dummy_nsrr_actigraphy_cached(
-                f"{activity_counts_dir}/{record_id}-activity-counts.npy"
+                f"{activity_counts_dir}/{record_id}-activity-counts.npy", hours
             )
             record_ids.append(record_id)
 
@@ -213,10 +216,12 @@ def test_read_mesa_actigraphy(tmp_path):
 
     assert len(records) == 2
 
-    for rec in records:
+    for i, rec in enumerate(records):
         assert rec.sleep_stage_duration == 30
         assert set(rec.sleep_stages) - valid_stages == set()
-        assert len(rec.activity_counts) == 4
+        # multiply with 3600 to convert duration (hours) to seconds, divide by 30 (epoch
+        # length for this test)
+        assert len(rec.activity_counts) == int(durations[i] * 120)
         assert Path(
             f"{tmp_path}/mesa/preprocessed/activity_counts/{rec.id}-activity-counts.npy"
         ).exists()
@@ -239,10 +244,10 @@ def test_read_mesa_actigraphy_cached(tmp_path):
 
     assert len(records) == 2
 
-    for rec in records:
+    for i, rec in enumerate(records):
         assert rec.sleep_stage_duration == 30
         assert set(rec.sleep_stages) - valid_stages == set()
-        assert len(rec.activity_counts) == 6
+        assert len(rec.activity_counts) == int(durations[i] * 120)
 
 
 def test_read_shhs(tmp_path):