Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
## [UNRELEASED] - YYYY-MM-DD
## Added
- Add support for activity counts feature ([#262](https://github.com/cbrnr/sleepecg/pull/262) by [Simon Pusterhofer](https://github.com/simon-p-2000))

## [0.5.9] - 2025-02-01
### Added
Expand Down
15 changes: 15 additions & 0 deletions docs/feature_extraction.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Feature extraction

## Heart rate variability features

Features are based on standards of heart rate variability (HRV) measurement and interpretation described in [Task Force of the European Society of Cardiology (1996)](https://doi.org/10.1161/01.CIR.93.5.1043) and [Shaffer & Ginsberg (2017)](https://doi.org/10.3389/fpubh.2017.00258).


### Time domain

Group identifier: `hrv-time`

All time domain HRV features are either derived from normal-to-normal (NN) intervals, from successive differences between NN intervals (SD), or from the [Poincaré plot (PP)](https://en.wikipedia.org/wiki/Poincar%C3%A9_plot).
Expand Down Expand Up @@ -41,7 +44,9 @@ All time domain HRV features are either derived from normal-to-normal (NN) inter
|`CSI`|cardiac sympathetic index|PP|
|`CVI`|cardiac vagal index|PP|


### Frequency domain

Group identifier: `hrv-frequency`

For calculating frequency domain HRV features, the RR time series is resampled at regular intervals, after which the power spectral density (PSD) is estimated using [Welch's method](https://en.wikipedia.org/wiki/Welch%27s_method).
Expand All @@ -58,6 +63,7 @@ For calculating frequency domain HRV features, the RR time series is resampled a


## Metadata features

Group identifier: `metadata`

|Feature|Description|
Expand All @@ -66,3 +72,12 @@ Group identifier: `metadata`
|`age`|age of the subject in years|
|`gender`|`0` (female) or `1` (male)|
|`weight`|weight of the subject in kg|


## Actigraphy features

Group identifier: `actigraphy`

| Feature | Description |
|-------------------|--------------------------------------------------------------------------------------------------------|
| `activity_counts` | Philips Actiwatch proprietary metric to quantify amount of patient movement measured via accelerometry |
7 changes: 4 additions & 3 deletions src/sleepecg/io/sleep_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,12 +493,13 @@ def read_mesa(
activity_counts = np.array(activity_counts)

diff = len(activity_counts) - len(parsed_xml.sleep_stages)
if np.abs(diff) > 2:

if abs(diff) > 2:
print(f"Skipping {record_id} due to invalid activity counts.")
continue
elif 0 < diff <= 2:
elif diff > 0:
activity_counts = activity_counts[:-diff]
elif 0 < diff * -1 <= 2:
elif diff < 0:
activity_counts = np.append(activity_counts, activity_counts[diff:])

activity_counts[activity_counts == ""] = "0"
Expand Down
45 changes: 25 additions & 20 deletions tests/test_sleep_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,26 @@ def _dummy_nsrr_overlap(filename: str, mesa_ids: list[int]):
csv.write(f"{mesa_ids[i][-1]},1,20:30:00,20:29:59\n")


def _dummy_nsrr_actigraphy(filename: str, mesa_id: str):
def _dummy_nsrr_actigraphy(filename: str, mesa_id: str, hours: float):
"""Create dummy actigraphy file with four usable activity counts."""
base_time = datetime.datetime(2024, 1, 1, 20, 30, 0)

# hours * 3600 / 30 second epoch, additional 20 counts for safety
number_activity_counts = int(hours * 120) + 20
linetimes = [
(base_time + datetime.timedelta(seconds=30 * i)).strftime("%H:%M:%S")
for i in range(10)
for i in range(number_activity_counts)
]

with open(filename, "w") as csv:
csv.write("mesaid,line,linetime,activity\n")
for i in range(10):
for i in range(number_activity_counts):
csv.write(f"{mesa_id[-1]},{1 + i},{linetimes[i]},10\n")


def _dummy_nsrr_actigraphy_cached(filename: str):
def _dummy_nsrr_actigraphy_cached(filename: str, hours: float):
"""Create dummy npy file that resembles cached activity counts."""
activity_counts = np.array([10, 10, 10, 10, 10, 10])
number_activity_counts = int(hours * 120)
activity_counts = np.array([10 for i in range(number_activity_counts)])
np.save(filename, activity_counts)


Expand All @@ -54,7 +56,6 @@ def _dummy_nsrr_edf(filename: str, hours: float, ecg_channel: str):

def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
EPOCH_LENGTH = 30
RECORDING_DURATION = 154.0
STAGES = [
"Wake|0",
"Stage 1 sleep|1",
Expand All @@ -66,7 +67,7 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
]

rng = np.random.default_rng(random_state)

record_duration = hours * 60 * 60
with open(filename, "w") as xml_file:
xml_file.write(
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
Expand All @@ -76,16 +77,16 @@ def _dummy_nsrr_xml(filename: str, hours: float, random_state: int):
"<ScoredEvent>\n"
"<EventType/>\n"
"<EventConcept>Recording Start Time</EventConcept>\n"
f"<Duration>{RECORDING_DURATION}</Duration>\n"
f"<Duration>{record_duration}</Duration>\n"
"<ClockTime>01.01.85 20.29.59</ClockTime>\n"
"</ScoredEvent>\n",
)
record_duration = hours * 60 * 60
start = 0
while True:
if start > record_duration:
break
epoch_duration = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
while start < record_duration:
# choose a candidate epoch duration in seconds.
epoch_duration_candidate = rng.choice(np.arange(4, 21)) * EPOCH_LENGTH
# use the remaining time if the candidate overshoots the record duration
epoch_duration = min(epoch_duration_candidate, record_duration - start)
stage = rng.choice(STAGES)
xml_file.write(
"<ScoredEvent>\n"
Expand Down Expand Up @@ -134,9 +135,11 @@ def _create_dummy_mesa(
_dummy_nsrr_edf(f"{edf_dir}/{record_id}.edf", hours, ecg_channel="EKG")
_dummy_nsrr_xml(f"{annotations_dir}/{record_id}-nsrr.xml", hours, random_state)
if actigraphy:
_dummy_nsrr_actigraphy(f"{activity_dir}/{record_id}.csv", mesa_id=record_id)
_dummy_nsrr_actigraphy(
f"{activity_dir}/{record_id}.csv", mesa_id=record_id, hours=hours
)
_dummy_nsrr_actigraphy_cached(
f"{activity_counts_dir}/{record_id}-activity-counts.npy"
f"{activity_counts_dir}/{record_id}-activity-counts.npy", hours
)
record_ids.append(record_id)

Expand Down Expand Up @@ -213,10 +216,12 @@ def test_read_mesa_actigraphy(tmp_path):

assert len(records) == 2

for rec in records:
for i, rec in enumerate(records):
assert rec.sleep_stage_duration == 30
assert set(rec.sleep_stages) - valid_stages == set()
assert len(rec.activity_counts) == 4
# multiply with 3600 to convert duration (hours) to seconds, divide by 30 (epoch
# length for this test)
assert len(rec.activity_counts) == int(durations[i] * 120)
assert Path(
f"{tmp_path}/mesa/preprocessed/activity_counts/{rec.id}-activity-counts.npy"
).exists()
Expand All @@ -239,10 +244,10 @@ def test_read_mesa_actigraphy_cached(tmp_path):

assert len(records) == 2

for rec in records:
for i, rec in enumerate(records):
assert rec.sleep_stage_duration == 30
assert set(rec.sleep_stages) - valid_stages == set()
assert len(rec.activity_counts) == 6
assert len(rec.activity_counts) == int(durations[i] * 120)


def test_read_shhs(tmp_path):
Expand Down
Loading