diff --git a/nextflow/configs/profiles/sumner2.config b/nextflow/configs/profiles/sumner2.config index 49b11d7..7b1dfdd 100644 --- a/nextflow/configs/profiles/sumner2.config +++ b/nextflow/configs/profiles/sumner2.config @@ -29,7 +29,7 @@ params { tracking_code_dir = "/kumar_lab_models/mouse-tracking-runtime/" gait_code_dir = "/gait-analysis/" vfi_code_dir = "/vfi/Code/" - support_code_dir = "/mouse-tracking-runtime/support_code/" + support_code_dir = "/workspace/support_code/" heuristic_classifier_folder = "/opt/JABS-postprocess/src/jabs_postprocess/heuristic_classifiers/" filter_processed = false model_dir = "/projects/kumar-lab/multimouse-pipeline/nextflow-artifacts/neural_net_models/" @@ -90,7 +90,7 @@ params { ], ] - heuristic_classifiers = ["corner", "corner_facing", "freeze", "locomotion", "periphery", "wall_facing"] + heuristic_classifiers = ["corner", "corner_facing", "freeze", "locomotion", "periphery", "wall_facing", "locomotion_corner", "locomotion_periphery"] // Number of 5-minute bins for transforming summary tables into bins // 1 = 5 minutes, 4 = 20 minutes, etc. @@ -182,7 +182,7 @@ process { container = "/projects/kumar-lab/meta/images/JABS-behavior-classifier/headless/v0.36.1/latest.sif" } withLabel: "jabs_postprocess" { - container = "/projects/kumar-lab/meta/images/JABS-postprocess/jabs-postprocess/v0.4.2/latest.sif" + container = "/projects/kumar-lab/meta/images/JABS-postprocess/jabs-postprocess/v0.5.1/latest.sif" } withLabel: "jabs_table_convert" { container = "/projects/kumar-lab/meta/images/mouse-tracking-runtime/RBase/v0.1.4/latest.sif" diff --git a/nextflow/modules/jabs_classifiers.nf b/nextflow/modules/jabs_classifiers.nf index a08ae5f..d53520b 100644 --- a/nextflow/modules/jabs_classifiers.nf +++ b/nextflow/modules/jabs_classifiers.nf @@ -171,7 +171,7 @@ process PREDICT_HEURISTICS { * @return features The generated feature file. */ process BEHAVIOR_TABLE_TO_FEATURES { - label "jabs_table_convert" + label "tracking" label "cpu" label "r_jabs_table_convert" @@ -183,7 +183,7 @@ process BEHAVIOR_TABLE_TO_FEATURES { script: """ - Rscript ${params.support_code_dir}behavior_summaries.R -f ${in_summary_table} -b ${bin_size} -o "${in_summary_table.baseName}_features_${bin_size}.csv" + python3 ${params.support_code_dir}/behavior_summaries.py -f ${in_summary_table} -b ${bin_size} -o "${in_summary_table.baseName}_features_${bin_size}.csv" """ } diff --git a/support_code/behavior_summaries.py b/support_code/behavior_summaries.py index 6621767..a24a26f 100644 --- a/support_code/behavior_summaries.py +++ b/support_code/behavior_summaries.py @@ -5,8 +5,10 @@ from JABS postprocessing summary tables, calculating metrics like time spent in behaviors and distances traveled. """ + import argparse +import numpy as np import pandas as pd @@ -100,6 +102,14 @@ def get_columns_to_exclude(behavior: str) -> list: "bout_behavior", "not_behavior_dist", "behavior_dist", + "behavior_dist_threshold", + "behavior_dist_seg", + "avg_bout_duration", + "_stats_sample_count", + "bout_duration_std", + "bout_duration_var", + "latency_to_first_prediction", + "latency_to_last_prediction", ] return [f"{behavior}_{suffix}" for suffix in suffixes] @@ -129,10 +139,11 @@ def aggregate_data_by_bin_size( time_behavior_col = f"{behavior}_time_behavior" time_not_behavior_col = f"{behavior}_time_not_behavior" behavior_dist_col = f"{behavior}_behavior_dist" + behavior_bout_col = f"{behavior}_bout_behavior" # Calculate time spent in behavior # TODO: Do we need to make `5` a configurable parameter? - aggregated[f"bin_avg_{bin_size*5}.{behavior}_time_secs"] = ( + aggregated[f"bin_sum_{bin_size * 5}.{behavior}_time_secs"] = ( aggregated[time_behavior_col] / (aggregated[time_behavior_col] + aggregated[time_not_behavior_col]) * bin_size @@ -141,9 +152,40 @@ def aggregate_data_by_bin_size( # Calculate average distance (in cm) # TODO: Do we need to make `5` a configurable parameter? - aggregated[f"bin_avg_{bin_size*5}.{behavior}_distance_cm"] = aggregated[ + aggregated[f"bin_avg_{bin_size * 5}.{behavior}_distance_cm"] = aggregated[ behavior_dist_col ] / (bin_size * 5) + aggregated[f"bin_sum_{bin_size * 5}.{behavior}_distance_cm"] = aggregated[ + behavior_dist_col + ] + aggregated[f"bin_sum_{bin_size * 5}.{behavior}_distance_cm_threshold"] = aggregated[ + f"{behavior}_behavior_dist_threshold" + ] + aggregated[f"bin_sum_{bin_size * 5}.{behavior}_distance_cm_seg"] = aggregated[ + f"{behavior}_behavior_dist_seg" + ] + + # Sum up bout count + aggregated[f"bin_sum_{bin_size * 5}.{behavior}_bout_behavior"] = aggregated[ + behavior_bout_col + ] + + # Additional stats + if np.sum(aggregated[f"{behavior}__stats_sample_count"]) == 0: + aggregated[f"bin_avg_{bin_size * 5}.{behavior}_avg_bout_length"] = np.nan + else: + aggregated[f"bin_avg_{bin_size * 5}.{behavior}_avg_bout_length"] = np.average( + aggregated[f"{behavior}_avg_bout_duration"], + weights=aggregated[f"{behavior}__stats_sample_count"], + ) + # TODO: var and std need to be aggregated across bins. + # This is non-trivial because of the partial bouts and their associated weights. + aggregated[f"bin_first_{bin_size * 5}.{behavior}_latency_first_prediction"] = ( + aggregated[f"{behavior}_latency_to_first_prediction"].head(1) + ) + aggregated[f"bin_last_{bin_size * 5}.{behavior}_latency_last_prediction"] = ( + aggregated[f"{behavior}_latency_to_last_prediction"].tail(1) + ) # Reset index to make MouseID a regular column return aggregated.reset_index()