Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
This file documents the changes to the features for supported feature versions.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Fixed
- `features.voltage_features_set` returns features by categories, sorted as the pydantic model definitions

## [0.2.2] - 2025-09-25

### Added
Expand Down
47 changes: 34 additions & 13 deletions examples/train_region_predictor_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@

import sklearn.metrics
from xgboost import XGBClassifier # pip install xgboost # https://xgboost.readthedocs.io/en/stable/prediction.html

import xgboost
import iblutil.numerical
import ephysatlas.anatomy
import ephysatlas.data
import ephysatlas.fixtures
import ephysatlas.regionclassifier

VINTAGE = '2024_W50'
VINTAGE = '2025_W28'
path_features = Path(f'/Users/olivier/Documents/datadisk/ephys-atlas-decoding/features/{VINTAGE}') # mac
path_features = Path(f'/mnt/s0/ephys-atlas-decoding/features/{VINTAGE}') # parede
path_features = Path(f'/datadisk/Data/paper-ephys-atlas/ephys-atlas-decoding/features/{VINTAGE}') # ferret
PROJECT = 'ea_active'
VINTAGE = '2025_W39'
LOWQ = ephysatlas.fixtures.misaligned_pids

root_path_features = Path(f'/datadisk/Data/paper-ephys-atlas/ephys-atlas-decoding/features') # ferret
path_features = root_path_features.joinpath(PROJECT, VINTAGE, 'agg_full')

if not path_features.exists():
from one.api import ONE
one = ONE()
ephysatlas.data.download_tables(path_features.parent, label=VINTAGE, one=one)
LOWQ = ephysatlas.fixtures.misaligned_pids
ephysatlas.data.download_tables(path_features, label=VINTAGE, one=one)

brain_atlas = ephysatlas.anatomy.ClassifierAtlas()

Expand All @@ -42,6 +42,26 @@
# "micro-manipulator",
]
x_list = ephysatlas.features.voltage_features_set(FEATURE_SET)
# x_list = list(set(x_list) - set(
# ['aperiodic_exponent',
# 'aperiodic_offset',
# 'decay_fit_error',
# 'decay_fit_r_squared',
# 'decay_n_peaks',
# 'psd_residual_alpha',
# 'psd_residual_beta',
# 'psd_residual_delta',
# 'psd_residual_gamma',
# 'psd_residual_lfp',
# 'psd_residual_theta']))


# x_list = list(set(x_list) - set(
# ['decay_fit_error',
# 'decay_fit_r_squared'
# ]))


x_list.append("outside")

TRAIN_LABEL = "Cosmos_id" # ['Beryl_id', 'Cosmos_id']
Expand All @@ -56,10 +76,10 @@ def train(test_idx, fold_label=''):
train_idx = ~test_idx
print(f"{fold_label}: {df_features.shape[0]} channels", f'training set {np.sum(test_idx) / test_idx.size}')
df_features.loc[train_idx, :].groupby(TRAIN_LABEL).count()
x_train = df_features.loc[train_idx, x_list].values
x_test = df_features.loc[test_idx, x_list].values
y_train = df_features.loc[train_idx, TRAIN_LABEL].values
y_test = df_features.loc[test_idx, TRAIN_LABEL].values
x_train = df_features.loc[train_idx, x_list].values.astype(float)
x_test = df_features.loc[test_idx, x_list].values.astype(float)
y_train = df_features.loc[train_idx, TRAIN_LABEL].values.astype(float)
y_test = df_features.loc[test_idx, TRAIN_LABEL].values.astype(float)
df_test = df_features.loc[test_idx, :].copy()
classes = np.unique(df_features.loc[train_idx, TRAIN_LABEL])

Expand All @@ -82,6 +102,7 @@ def train(test_idx, fold_label=''):
print(f"{fold_label} Accuracy: {accuracy}")

np.testing.assert_array_equal(classes, rids)

return classifier.predict_proba(x_test), classifier, accuracy, confusion_matrix


Expand Down Expand Up @@ -124,7 +145,7 @@ def train(test_idx, fold_label=''):


accuracy = sklearn.metrics.accuracy_score(df_features[TRAIN_LABEL].values, df_predictions['prediction'].values.astype(int))
sklearn.metrics.ConfusionMatrixDisplay.from_predictions(df_features[TRAIN_LABEL].values, df_predictions['prediction'].values.astype(int), normalize='true', cmap='Blues')
sklearn.metrics.ConfusionMatrixDisplay.from_predictions(df_features[TRAIN_LABEL].values, df_predictions['prediction'].values.astype(int), normalize='true', cmap='Blues', im_kw=dict(vmax=.75))
_, classifier, _, _ = train(test_idx=np.zeros(df_features.shape[0], dtype=bool))
meta = dict(
RANDOM_SEED=rs,
Expand Down
25 changes: 0 additions & 25 deletions requirements.txt

This file was deleted.

9 changes: 9 additions & 0 deletions src/ephysatlas/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,15 @@ def read_features_from_disk(
if strict:
df_features = pd.DataFrame(ephysatlas.features.ModelRawFeatures(df_features))

ialpha_bad = np.logical_or(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this filtering be done here?
Or should we do it before hand when the feature for individual snippet is calculated. And replace them with nans, and then total variation filter will take care of replacing them with actual values. I think we will get different results from these approaches.

df_features['alpha_mean'].values >= (1e3 * np.nanmedian(df_features['alpha_mean'])),
df_features['alpha_std'].values >= 1e3 * np.nanmedian(df_features['alpha_std'])
)
# then we join with the channel information to get coordinates and anatomical information
print(f"Number of channels with bad alpha: {np.sum(ialpha_bad)}")
df_features.loc[ialpha_bad, 'alpha_mean'] = np.nanmedian(df_features['alpha_mean'])
df_features.loc[ialpha_bad, 'alpha_std'] = np.nanmedian(df_features['alpha_std'])

return df_features


Expand Down
50 changes: 12 additions & 38 deletions src/ephysatlas/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,24 +284,23 @@ class ModelLfFeatures(BaseChannelFeatures):
rms_lf: Series[float] = pa.Field(
coerce=True, metadata={"transform": lambda x: 20 * np.log10(x)}
)
psd_lfp: Series[float] = pa.Field(coerce=True)
psd_delta: Series[float] = pa.Field(coerce=True)
psd_theta: Series[float] = pa.Field(coerce=True)
psd_alpha: Series[float] = pa.Field(coerce=True)
psd_beta: Series[float] = pa.Field(coerce=True)
psd_gamma: Series[float] = pa.Field(coerce=True)
psd_lfp: Series[float] = pa.Field(coerce=True)
aperiodic_offset: Optional[Series[float]] = pa.Field(coerce=True)
aperiodic_exponent: Optional[Series[float]] = pa.Field(coerce=True)
decay_fit_error: Optional[Series[float]] = pa.Field(coerce=True)
decay_fit_r_squared: Optional[Series[float]] = pa.Field(coerce=True)
decay_n_peaks: Optional[Series[int]] = pa.Field(coerce=True)
psd_residual_lfp: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_delta: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_theta: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_alpha: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_beta: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_gamma: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)
psd_residual_lfp: Optional[Series[float]] = pa.Field(coerce=True, nullable=True)

aperiodic_offset: Optional[Series[float]] = pa.Field(coerce=True)
aperiodic_exponent: Optional[Series[float]] = pa.Field(coerce=True)
decay_fit_error: Optional[Series[float]] = pa.Field(coerce=True)
decay_fit_r_squared: Optional[Series[float]] = pa.Field(coerce=True)
decay_n_peaks: Optional[Series[int]] = pa.Field(coerce=True)

class ModelCsdFeatures(BaseChannelFeatures):
"""Schema for current source density features.
Expand Down Expand Up @@ -491,40 +490,15 @@ def voltage_features_set(features_list=FEATURES_LIST):
for feature_group in features_list:
match feature_group:
case "raw_ap":
x_list += sorted(
list(
set(ModelApFeatures.to_schema().columns.keys())
- set(["channel"])
)
)
x_list += list(ModelApFeatures.to_schema().columns.keys())
case "raw_lf":
x_list += sorted(
list(
set(ModelLfFeatures.to_schema().columns.keys())
- set(["channel"])
)
)
x_list += list(ModelLfFeatures.to_schema().columns.keys())
case "raw_lf_csd":
x_list += sorted(
list(
set(ModelCsdFeatures.to_schema().columns.keys())
- set(["channel"])
)
)
x_list += list(ModelCsdFeatures.to_schema().columns.keys())
case "waveforms":
x_list += sorted(
list(
set(ModelSpikeFeatures.to_schema().columns.keys())
- set(["channel"])
)
)
x_list += list(ModelSpikeFeatures.to_schema().columns.keys())
case "micro-manipulator":
x_list += sorted(
list(
set(ModelHistologyPlanned.to_schema().columns.keys())
- set(["channel"])
)
)
x_list += list(ModelHistologyPlanned.to_schema().columns.keys())
return x_list


Expand Down