diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae169f8a..b92b107c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,6 @@
## [UNRELEASED] - YYYY-MM-DD
+## Added
+- Add support for activity counts feature ([#262](https://github.com/cbrnr/sleepecg/pull/262) by [Simon Pusterhofer](https://github.com/simon-p-2000))
## [0.5.9] - 2025-02-01
### Added
diff --git a/docs/feature_extraction.md b/docs/feature_extraction.md
index 9f117343..f0f6c0b4 100644
--- a/docs/feature_extraction.md
+++ b/docs/feature_extraction.md
@@ -1,9 +1,12 @@
# Feature extraction
## Heart rate variability features
+
Features are based on standards of heart rate variability (HRV) measurement and interpretation described in [Task Force of the European Society of Cardiology (1996)](https://doi.org/10.1161/01.CIR.93.5.1043) and [Shaffer & Ginsberg (2017)](https://doi.org/10.3389/fpubh.2017.00258).
+
### Time domain
+
Group identifier: `hrv-time`
All time domain HRV features are either derived from normal-to-normal (NN) intervals, from successive differences between NN intervals (SD), or from the [Poincaré plot (PP)](https://en.wikipedia.org/wiki/Poincar%C3%A9_plot).
@@ -41,7 +44,9 @@ All time domain HRV features are either derived from normal-to-normal (NN) inter
|`CSI`|cardiac sympathetic index|PP|
|`CVI`|cardiac vagal index|PP|
+
### Frequency domain
+
Group identifier: `hrv-frequency`
For calculating frequency domain HRV features, the RR time series is resampled at regular intervals, after which the power spectral density (PSD) is estimated using [Welch's method](https://en.wikipedia.org/wiki/Welch%27s_method).
@@ -58,6 +63,7 @@ For calculating frequency domain HRV features, the RR time series is resampled a
## Metadata features
+
Group identifier: `metadata`
|Feature|Description|
@@ -66,3 +72,12 @@ Group identifier: `metadata`
|`age`|age of the subject in years|
|`gender`|`0` (female) or `1` (male)|
|`weight`|weight of the subject in kg|
+
+
+## Actigraphy features
+
+Group identifier: `actigraphy`
+
+| Feature | Description |
+|-------------------|--------------------------------------------------------------------------------------------------------|
+| `activity_counts` | Philips Actiwatch proprietary metric to quantify amount of patient movement measured via accelerometry |
diff --git a/src/sleepecg/io/sleep_readers.py b/src/sleepecg/io/sleep_readers.py
index 3f3eb5b0..fd1d692e 100644
--- a/src/sleepecg/io/sleep_readers.py
+++ b/src/sleepecg/io/sleep_readers.py
@@ -493,12 +493,13 @@ def read_mesa(
activity_counts = np.array(activity_counts)
diff = len(activity_counts) - len(parsed_xml.sleep_stages)
- if np.abs(diff) > 2:
+
+ if abs(diff) > 2:
print(f"Skipping {record_id} due to invalid activity counts.")
continue
- elif 0 < diff <= 2:
+ elif diff > 0:
activity_counts = activity_counts[:-diff]
- elif 0 < diff * -1 <= 2:
+ elif diff < 0:
activity_counts = np.append(activity_counts, activity_counts[diff:])
activity_counts[activity_counts == ""] = "0"
diff --git a/tests/test_sleep_readers.py b/tests/test_sleep_readers.py
index 8901c82e..fd63204e 100644
--- a/tests/test_sleep_readers.py
+++ b/tests/test_sleep_readers.py
@@ -24,24 +24,26 @@ def _dummy_nsrr_overlap(filename: str, mesa_ids: list[int]):
csv.write(f"{mesa_ids[i][-1]},1,20:30:00,20:29:59\n")
-def _dummy_nsrr_actigraphy(filename: str, mesa_id: str):
+def _dummy_nsrr_actigraphy(filename: str, mesa_id: str, hours: float):
"""Create dummy actigraphy file with four usable activity counts."""
base_time = datetime.datetime(2024, 1, 1, 20, 30, 0)
-
+ # hours * 3600 / 30 second epoch, additional 20 counts for safety
+ number_activity_counts = int(hours * 120) + 20
linetimes = [
(base_time + datetime.timedelta(seconds=30 * i)).strftime("%H:%M:%S")
- for i in range(10)
+ for i in range(number_activity_counts)
]
with open(filename, "w") as csv:
csv.write("mesaid,line,linetime,activity\n")
- for i in range(10):
+ for i in range(number_activity_counts):
csv.write(f"{mesa_id[-1]},{1 + i},{linetimes[i]},10\n")
-def _dummy_nsrr_actigraphy_cached(filename: str):
+def _dummy_nsrr_actigraphy_cached(filename: str, hours: float):
"""Create dummy npy file that resembles cached activity counts."""
- activity_counts = np.array([10, 10, 10, 10, 10, 10])
+ number_activity_counts = int(hours * 120)
+ activity_counts = np.array([10 for i in range(number_activity_counts)])
np.save(filename, activity_counts)
@@ -54,7 +56,6 @@ def _dummy_nsrr_edf(filename: str, hours: float, ecg_channel: str):
def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
EPOCH_LENGTH = 30
- RECORDING_DURATION = 154.0
STAGES = [
"Wake|0",
"Stage 1 sleep|1",
@@ -66,7 +67,7 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
]
rng = np.random.default_rng(random_state)
-
+ record_duration = hours * 60 * 60
with open(filename, "w") as xml_file:
xml_file.write(
'\n'
@@ -76,16 +77,16 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
"\n"
"\n"
"Recording Start Time\n"
- f"{RECORDING_DURATION}\n"
+ f"{record_duration}\n"
"01.01.85 20.29.59\n"
"\n",
)
- record_duration = hours * 60 * 60
start = 0
- while True:
- if start > record_duration:
- break
- epoch_duration = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
+ while start < record_duration:
+ # choose a candidate epoch duration in seconds.
+ epoch_duration_candidate = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
+ # use the remaining time if the candidate overshoots the record duration
+ epoch_duration = min(epoch_duration_candidate, record_duration - start)
stage = rng.choice(STAGES)
xml_file.write(
"\n"
@@ -134,9 +135,11 @@ def _create_dummy_mesa(
_dummy_nsrr_edf(f"{edf_dir}/{record_id}.edf", hours, ecg_channel="EKG")
_dummy_nsrr_xml(f"{annotations_dir}/{record_id}-nsrr.xml", hours, random_state)
if actigraphy:
- _dummy_nsrr_actigraphy(f"{activity_dir}/{record_id}.csv", mesa_id=record_id)
+ _dummy_nsrr_actigraphy(
+ f"{activity_dir}/{record_id}.csv", mesa_id=record_id, hours=hours
+ )
_dummy_nsrr_actigraphy_cached(
- f"{activity_counts_dir}/{record_id}-activity-counts.npy"
+ f"{activity_counts_dir}/{record_id}-activity-counts.npy", hours
)
record_ids.append(record_id)
@@ -213,10 +216,12 @@ def test_read_mesa_actigraphy(tmp_path):
assert len(records) == 2
- for rec in records:
+ for i, rec in enumerate(records):
assert rec.sleep_stage_duration == 30
assert set(rec.sleep_stages) - valid_stages == set()
- assert len(rec.activity_counts) == 4
+ # multiply with 3600 to convert duration (hours) to seconds, divide by 30 (epoch
+ # length for this test)
+ assert len(rec.activity_counts) == int(durations[i] * 120)
assert Path(
f"{tmp_path}/mesa/preprocessed/activity_counts/{rec.id}-activity-counts.npy"
).exists()
@@ -239,10 +244,10 @@ def test_read_mesa_actigraphy_cached(tmp_path):
assert len(records) == 2
- for rec in records:
+ for i, rec in enumerate(records):
assert rec.sleep_stage_duration == 30
assert set(rec.sleep_stages) - valid_stages == set()
- assert len(rec.activity_counts) == 6
+ assert len(rec.activity_counts) == int(durations[i] * 120)
def test_read_shhs(tmp_path):