From dcc114a2c2dfcfaba74b268ba9ab4d9aff48bb55 Mon Sep 17 00:00:00 2001 From: Remi Gau Date: Mon, 18 Aug 2025 08:06:23 +0200 Subject: [PATCH] paper --- .codespellrc | 2 +- docs/paper/.gitignore | 3 + docs/paper/checklist.md | 79 +++++++++++++++ docs/paper/metadata.py | 81 +++++++++++++++ docs/paper/metadata.yml | 62 ++++++++++++ docs/paper/paper.Rmd | 194 ++++++++++++++++++++++++++++++++++++ docs/paper/paper.Rproj | 13 +++ docs/paper/paper.bib | 99 ++++++++++++++++++ docs/paper/requirements.txt | 1 + 9 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 docs/paper/.gitignore create mode 100644 docs/paper/checklist.md create mode 100644 docs/paper/metadata.py create mode 100644 docs/paper/metadata.yml create mode 100644 docs/paper/paper.Rmd create mode 100644 docs/paper/paper.Rproj create mode 100644 docs/paper/paper.bib create mode 100644 docs/paper/requirements.txt diff --git a/.codespellrc b/.codespellrc index 82787b09..2620b156 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,4 +1,4 @@ [codespell] -skip = *.js,*.svg,*.eps,.git,env,*build,bids-examples,coverage_html,schema.json +skip = *.js,*.svg,*.eps,.git,env,*build,bids-examples,coverage_html,schema.json,docs/paper/paper.bib ignore-words-list = te,ans,nin builtin = clear,rare diff --git a/docs/paper/.gitignore b/docs/paper/.gitignore new file mode 100644 index 00000000..2a73fd1b --- /dev/null +++ b/docs/paper/.gitignore @@ -0,0 +1,3 @@ +*.html +*.pdf +*.docx diff --git a/docs/paper/checklist.md b/docs/paper/checklist.md new file mode 100644 index 00000000..8f4ebb54 --- /dev/null +++ b/docs/paper/checklist.md @@ -0,0 +1,79 @@ +## General checks + +Repository: + +Is the source code for this software available at the https://github.com/JamesPHoughton/pysd/? + +License: + +Does the repository contain a plain-text LICENSE file with the contents of an OSI approved software license? + +Contribution and authorship: + +Has the submitting author (@rogersamso) made major contributions to the software? Does the full list of paper authors seem appropriate and complete? + +Substantial scholarly effort: + +Does this submission meet the scope eligibility described in the JOSS guidelines + +## Functionality + +Installation: + +Does installation proceed as outlined in the documentation? + +Functionality: + +Have the functional claims of the software been confirmed? + +Performance: + +If there are any performance claims of the software, have they been confirmed? (If there are no claims, please check off this item.) + +## Documentation + +A statement of need: + +Do the authors clearly state what problems the software is designed to solve and who the target audience is? + +Installation instructions: + +Is there a clearly-stated list of dependencies? Ideally these should be handled with an automated package management solution. + +Example usage: + +Do the authors include examples of how to use the software (ideally to solve real-world analysis problems). + +Functionality documentation: + +Is the core functionality of the software documented to a satisfactory level (e.g., API method documentation)? + +Automated tests: + +Are there automated tests or manual steps described so that the functionality of the software can be verified? + +Community guidelines: + +Are there clear guidelines for third parties wishing to 1) Contribute to the software 2) Report issues or problems with the software 3) Seek support + +## Software paper + +Summary: + +Has a clear description of the high-level functionality and purpose of the software for a diverse, non-specialist audience been provided? + +A statement of need: + +Does the paper have a section titled 'Statement of Need' that clearly states what problems the software is designed to solve and who the target audience is? + +State of the field: + +Do the authors describe how this software compares to other commonly-used packages? + +Quality of writing: + +Is the paper well written (i.e., it does not require editing for structure, language, or writing quality)? + +References: + +Is the list of references complete, and is everything cited appropriately that should be cited (e.g., papers, datasets, software)? Do references in the text use the proper citation syntax? diff --git a/docs/paper/metadata.py b/docs/paper/metadata.py new file mode 100644 index 00000000..6b2cfe28 --- /dev/null +++ b/docs/paper/metadata.py @@ -0,0 +1,81 @@ +# yml front matter for paper from citaton.cff file + +import ruamel.yaml +from pathlib import Path +from rich import print + +from datetime import datetime + +yaml = ruamel.yaml.YAML() +yaml.indent(mapping=2, sequence=4, offset=2) + +CITATION_CFF = Path(__file__).parent.parent.parent.joinpath("CITATION.cff") + +first_author = "Gau" +sort_authors_alphabetically = True + + +def return_author_order(author_list, first_author): + """Return the order of authors in the paper.""" + author_order = [x["family-names"].strip() for x in author_list] + if sort_authors_alphabetically: + author_order = sorted(author_order) + author_order.pop(author_order.index(first_author)) + author_order.insert(0, first_author) + print(author_order) + return author_order + + +def main(): + with open(CITATION_CFF, encoding="utf8") as f: + citation = yaml.load(f) + + author_order = return_author_order(citation["authors"], first_author) + author_names = [x["family-names"].strip() for x in citation["authors"]] + + author_list = [] + affiliation_list = [] + + for this_author_name in author_order: + + author = citation["authors"][author_names.index(this_author_name)] + + this_author = { + "name": f"{author['given-names']} {author.get('family-names', '')}".strip() + } + + if author.get("orcid", None) is not None: + this_author["orcid"] = author.get("orcid").replace("https://orcid.org/", "") + + if author.get("affiliation") is not None: + + this_affiliation = author.get("affiliation") + affiliation_list_names = [x["name"] for x in affiliation_list] + + if this_affiliation not in affiliation_list_names: + affiliation_list.append( + {"name": this_affiliation, "index": len(affiliation_list) + 1} + ) + + affiliation_list_names = [x["name"] for x in affiliation_list] + this_author["affiliation"] = ( + affiliation_list_names.index(this_affiliation) + 1 + ) + + author_list.append(this_author) + + content = { + "title": "", + "tags": citation["keywords"], + "authors": author_list, + "affiliations": affiliation_list, + "date": datetime.now().strftime("%Y-%m-%d"), + "bibliography": "paper.bib", + } + + with open("metadata.yml", "w", encoding="utf8") as output_file: + return yaml.dump(content, output_file) + + +if __name__ == "__main__": + main() diff --git a/docs/paper/metadata.yml b/docs/paper/metadata.yml new file mode 100644 index 00000000..588c9f35 --- /dev/null +++ b/docs/paper/metadata.yml @@ -0,0 +1,62 @@ +--- +title: '' +tags: +- MATLAB +- Octave +- brain imaging data structure +- MRI +- MEG +- EEG +- iEEG +- PET +- microscopy + +authors: +- name: Rémi Gau + orcid: 0000-0002-1535-9767 + affiliation: 1 +- name: Marco Barilari + orcid: 0000-0002-3313-3120 + affiliation: 1 +- name: Ceren Battal + orcid: 0000-0002-9844-7630 + affiliation: 1 +- name: Nikita Beliy +- name: Rotem Botvinik-Nezer +- name: Jeanne Caron-Guyon + orcid: 0000-0001-8681-5267 + affiliation: 1 +- name: Phillips Chrisophe + orcid: 0000-0002-4990-425X +- name: Tanguy Duval + orcid: 0000-0002-1228-5192 +- name: Guillaume Flandin + orcid: 0000-0003-0077-7859 +- name: Andrew Janke +- name: Michèle MacLean + orcid: 0000-0002-0174-9326 + affiliation: 2 +- name: Christopher Madan + orcid: 0000-0003-3228-6501 +- name: Henk Mutsaerts + orcid: 0000-0003-0894-0307 +- name: Guiomar Niso + orcid: 0000-0001-5872-8924 +- name: Martin Norgaard + orcid: 0000-0003-2131-5688 +- name: Robert Oostenveld + orcid: 0000-0002-1974-1293 +- name: Cyril Pernet + orcid: 0000-0003-4010-4632 +- name: Iqra Shahzad + orcid: 0000-0002-8724-7668 + affiliation: 1 +- name: Michał Szczepanik + orcid: 0000-0002-4028-2087 +affiliations: +- name: Université catholique de Louvain + index: 1 +- name: Université de Montréal + index: 2 +date: '2022-10-21' +bibliography: paper.bib diff --git a/docs/paper/paper.Rmd b/docs/paper/paper.Rmd new file mode 100644 index 00000000..b1eac526 --- /dev/null +++ b/docs/paper/paper.Rmd @@ -0,0 +1,194 @@ +--- +title: "BIDS-matlab: How to read the data from the BIDS dataset" +tags: + - MATLAB + - Octave + - brain imaging data structure + - MRI + - MEG + - EEG + - iEEG + - PET + - microscopy +authors: + - name: Rémi Gau + orcid: 0000-0002-1535-9767 + affiliation: 1 + - name: Marco Barilari + orcid: 0000-0002-3313-3120 + affiliation: 1 + - name: Ceren Battal + orcid: 0000-0002-9844-7630 + affiliation: 1 + - name: Nikita Beliy + - name: Rotem Botvinik-Nezer + - name: Jeanne Caron-Guyon + orcid: 0000-0001-8681-5267 + affiliation: 1 + - name: Phillips Chrisophe + orcid: 0000-0002-4990-425X + - name: Tanguy Duval + orcid: 0000-0002-1228-5192 + - name: Guillaume Flandin + orcid: 0000-0003-0077-7859 + - name: Andrew Janke + - name: Michèle MacLean + orcid: 0000-0002-0174-9326 + affiliation: 2 + - name: Christopher Madan + orcid: 0000-0003-3228-6501 + - name: Henk Mutsaerts + orcid: 0000-0003-0894-0307 + - name: Guiomar Niso + orcid: 0000-0001-5872-8924 + - name: Martin Norgaard + orcid: 0000-0003-2131-5688 + - name: Robert Oostenveld + orcid: 0000-0002-1974-1293 + - name: Cyril Pernet + orcid: 0000-0003-4010-4632 + - name: Iqra Shahzad + orcid: 0000-0002-8724-7668 + affiliation: 1 + - name: Michał Szczepanik + orcid: 0000-0002-4028-2087 +affiliations: + - name: Université catholique de Louvain + index: 1 + - name: Université de Montréal + index: 2 +date: '2022-10-21' +bibliography: paper.bib +--- + +Word count: `r wordcountaddin::word_count()` + +# Summary + +The brain imaging data structure (BIDS) [@gorgolewski_brain_2016] +is becoming the standard for organizing "neuroimaging" data. + +BIDS-matlab aims at centralizing MATLAB/Octave tools to interact with BIDS datasets. + +# Statement of need + +The brain imaging data structure (BIDS) [@gorgolewski_brain_2016] +is becoming the standard for organizing data in neuroscience +for several types of imaging modalites +(magnetic resonance imaging, positron emission tomography, microscopy...). + +There currently is a python package (pybids, @yarkoni_pybids_2019) +that "includes virtually any functionality that is likely to be of general use +when working with BIDS datasets (i.e., that is not specific to one narrow context)". + + + +There are also several MATLAB toolboxes (EEGlab `REF_NEEDED`, CONN toolbox `REF_NEEDED`) +have some BIDS support, at least to the extend that they can ingest +raw and/or derivatives BIDS datasets. + + + +But there is no common library that, independently of the imaging modality: +- can index and query data and metadata BIDS dataset (whether they are raw or derivatives datasets) +- helps easily create filenames that follow a BIDS pattern +- can interact with the BIDS schema +- implements several of transformers to help using the BIDS statistical models +- can work for both MATLAB and Octave. + +BIDS-matlab aims to fill that gap. + + +# Features + +At the moment of the writing of this article, BIDS-matlab has the main following features. + +## Indexing and querying datasets + +When a dataset has several dozens of participants with heterogeneous content +in terms of imaging modality or acquisition parameters and +with metadata for even a single data file potentially spread across several JSON files, +it can become difficult to fetch the right file or piece of metadata. + +BIDS-matlab can: + +- return the layout of a BIDS dataset (`bids.layout`), + +- perform queries on that layout to get information about the subjects, + sessions, runs, modalities, metadata... contained within that dataset (`bids.query`). + +BIDS-matlab is able to parse files in a BIDS datasets whether they follow the BIDS schema +or not, but require that they follow typical BIDS filenaming patterns (like those +generated by fMRIprep [@esteban_fmriprep_2019] - +see [next section](#creating-bids-valid-filenames) for more details). + +One notable extension was made to this filename parsing rule to accommodate filenames +that include a prefix before the standard BIDS filename +(for example `swuasub-01_task-rest_bold.nii` with the prefix `swua`) +as prefixing is such a common way to deal with derivatives filenames. + +## Creating BIDS valid filenames + +Most files in a BIDS dataset follow a standard naming convention that can be summarised as follow: + +```bash +datatype/sub-subLabel(_entity-label)*_suffix\.ext +``` + + +As of this writing, the BIDS specification supports: + +- 12 datatypes (for example: `func`, `anat`, `eeg`...) +- 28 different entities (for example: `ses`, `task`, `acq`...) +- 103 suffixes (for example: `bold`, `T1w`, `events`...) + +BIDS has a strict set of rules regarding: +- which suffix and entities are allowed for each datatype, +- which entities are allowed for raw and derivatives data, +- which entities are required or optional for each datatype and suffix, +- which order entities should appear in a filename. + +Creating BIDS valid filenames can therefore become a tedious task. +BIDS-matlab `bids.File` class can help by relying on the BIDS schema +to validate filenames upon creation. + +## transformers + + +## Dataset summaries + +- generate a human readable report of the content of BIDS data set containing + anatomical MRI, functional MRI, diffusion weighted imaging, field map data + (see `bids.report`) + +- create summary figures listing the number of files for each subject / session and + and imaging modality (see `bids.diagnostic`) + +![Multisubject dataset content split by task.\label{fig:example}](../source/images/MultisubjectMultimodalFaceProcessing_splitby-task.png){ width=100% } + +- access and query the [BIDS schema](https://bids-specification.readthedocs.io/en/latest/schema.json) (`bids.schema`) + +- access, query and create basic transformations for the [BIDS statistical model](https://bids-standard.github.io/stats-models/) (`bids.Model` and `bids.transformers`) + +The behavior of this toolbox assumes that it is interacting with a valid BIDS +dataset that should have been validated using +[BIDS-validator](https://bids-standard.github.io/bids-validator/). + +## Usage + +According to a recent survey of 283 scientist [@paret_survey_2022], +7% of the 101 ones that had used BIDS before had also used BIDS-matlab. + +BIDS-matlab is currently used in ASL explore [@mutsaerts_exploreasl_2020] and BIDSpm [@BIDSpm]. + +## Performance + +## Development + +- linting +- testing +- pre-commit + +# Acknowledgements + +# References diff --git a/docs/paper/paper.Rproj b/docs/paper/paper.Rproj new file mode 100644 index 00000000..8e3c2ebc --- /dev/null +++ b/docs/paper/paper.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/docs/paper/paper.bib b/docs/paper/paper.bib new file mode 100644 index 00000000..670c9bb8 --- /dev/null +++ b/docs/paper/paper.bib @@ -0,0 +1,99 @@ + +@article{yarkoni_pybids_2019, + title = {{PyBIDS}: {Python} tools for {BIDS} datasets}, + volume = {4}, + issn = {2475-9066}, + shorttitle = {{PyBIDS}}, + url = {https://joss.theoj.org/papers/10.21105/joss.01294}, + doi = {10.21105/joss.01294}, + number = {40}, + urldate = {2022-10-19}, + journal = {Journal of Open Source Software}, + author = {Yarkoni, Tal and Markiewicz, Christopher and de la Vega, Alejandro and Gorgolewski, Krzysztof and Salo, Taylor and Halchenko, Yaroslav and McNamara, Quinten and DeStasio, Krista and Poline, Jean-Baptiste and Petrov, Dmitry and Hayot-Sasson, Valérie and Nielson, Dylan and Carlin, Johan and Kiar, Gregory and Whitaker, Kirstie and DuPre, Elizabeth and Wagner, Adina and Tirrell, Lee and Jas, Mainak and Hanke, Michael and Poldrack, Russell and Esteban, Oscar and Appelhoff, Stefan and Holdgraf, Chris and Staden, Isla and Thirion, Bertrand and Kleinschmidt, Dave and Lee, John and di Castello, Matteo and Notter, Michael and Blair, Ross}, + month = aug, + year = {2019}, + pages = {1294}, + file = {Full Text:/home/remi/Zotero/storage/HY9LKCHU/Yarkoni et al. - 2019 - PyBIDS Python tools for BIDS datasets.pdf:application/pdf}, +} + +@article{paret_survey_2022, + title = {Survey on {Open} {Science} {Practices} in {Functional} {Neuroimaging}}, + volume = {257}, + issn = {10538119}, + url = {https://linkinghub.elsevier.com/retrieve/pii/S1053811922004256}, + doi = {10.1016/j.neuroimage.2022.119306}, + language = {en}, + urldate = {2022-10-19}, + journal = {NeuroImage}, + author = {Paret, Christian and Unverhau, Nike and Feingold, Franklin and Poldrack, Russell A. and Stirner, Madita and Schmahl, Christian and Sicorello, Maurizio}, + month = aug, + year = {2022}, + pages = {119306}, + file = {Full Text:/home/remi/Zotero/storage/QKUPZRG8/Paret et al. - 2022 - Survey on Open Science Practices in Functional Neu.pdf:application/pdf}, +} + +@article{gorgolewski_brain_2016, + title = {The brain imaging data structure, a format for organizing and describing outputs of neuroimaging experiments}, + volume = {3}, + issn = {2052-4463}, + url = {http://www.nature.com/articles/sdata201644}, + doi = {10.1038/sdata.2016.44}, + language = {en}, + number = {1}, + urldate = {2022-10-19}, + journal = {Scientific Data}, + author = {Gorgolewski, Krzysztof J. and Auer, Tibor and Calhoun, Vince D. and Craddock, R. Cameron and Das, Samir and Duff, Eugene P. and Flandin, Guillaume and Ghosh, Satrajit S. and Glatard, Tristan and Halchenko, Yaroslav O. and Handwerker, Daniel A. and Hanke, Michael and Keator, David and Li, Xiangrui and Michael, Zachary and Maumet, Camille and Nichols, B. Nolan and Nichols, Thomas E. and Pellman, John and Poline, Jean-Baptiste and Rokem, Ariel and Schaefer, Gunnar and Sochat, Vanessa and Triplett, William and Turner, Jessica A. and Varoquaux, Gaël and Poldrack, Russell A.}, + month = dec, + year = {2016}, + pages = {160044}, + file = {Full Text:/home/remi/Zotero/storage/GS7EW8E9/Gorgolewski et al. - 2016 - The brain imaging data structure, a format for org.pdf:application/pdf}, +} + +@article{mutsaerts_exploreasl_2020, + title = {{ExploreASL}: {An} image processing pipeline for multi-center {ASL} perfusion {MRI} studies}, + volume = {219}, + issn = {10538119}, + shorttitle = {{ExploreASL}}, + url = {https://linkinghub.elsevier.com/retrieve/pii/S1053811920305176}, + doi = {10.1016/j.neuroimage.2020.117031}, + language = {en}, + urldate = {2022-10-19}, + journal = {NeuroImage}, + author = {Mutsaerts, Henk J.M.M. and Petr, Jan and Groot, Paul and Vandemaele, Pieter and Ingala, Silvia and Robertson, Andrew D. and Václavů, Lena and Groote, Inge and Kuijf, Hugo and Zelaya, Fernando and O’Daly, Owen and Hilal, Saima and Wink, Alle Meije and Kant, Ilse and Caan, Matthan W.A. and Morgan, Catherine and de Bresser, Jeroen and Lysvik, Elisabeth and Schrantee, Anouk and Bjørnebekk, Astrid and Clement, Patricia and Shirzadi, Zahra and Kuijer, Joost P.A. and Wottschel, Viktor and Anazodo, Udunna C. and Pajkrt, Dasja and Richard, Edo and Bokkers, Reinoud P.H. and Reneman, Liesbeth and Masellis, Mario and Günther, Matthias and MacIntosh, Bradley J. and Achten, Eric and Chappell, Michael A. and van Osch, Matthias J.P. and Golay, Xavier and Thomas, David L. and De Vita, Enrico and Bjørnerud, Atle and Nederveen, Aart and Hendrikse, Jeroen and Asllani, Iris and Barkhof, Frederik}, + month = oct, + year = {2020}, + pages = {117031}, + file = {Full Text:/home/remi/Zotero/storage/T3TYKDUV/Mutsaerts et al. - 2020 - ExploreASL An image processing pipeline for multi.pdf:application/pdf}, +} + +@article{esteban_fmriprep_2019, + title = {{fMRIPrep}: a robust preprocessing pipeline for functional {MRI}}, + volume = {16}, + issn = {1548-7091, 1548-7105}, + shorttitle = {{fMRIPrep}}, + url = {http://www.nature.com/articles/s41592-018-0235-4}, + doi = {10.1038/s41592-018-0235-4}, + language = {en}, + number = {1}, + urldate = {2022-10-19}, + journal = {Nature Methods}, + author = {Esteban, Oscar and Markiewicz, Christopher J. and Blair, Ross W. and Moodie, Craig A. and Isik, A. Ilkay and Erramuzpe, Asier and Kent, James D. and Goncalves, Mathias and DuPre, Elizabeth and Snyder, Madeleine and Oya, Hiroyuki and Ghosh, Satrajit S. and Wright, Jessey and Durnez, Joke and Poldrack, Russell A. and Gorgolewski, Krzysztof J.}, + month = jan, + year = {2019}, + pages = {111--116}, + file = {Submitted Version:/home/remi/Zotero/storage/EJRS4M2V/Esteban et al. - 2019 - fMRIPrep a robust preprocessing pipeline for func.pdf:application/pdf}, +} + +@misc{BIDSpm, + title = {{CPP} {SPM}}, + copyright = {GNU General Public License v3.0 only, Open Access}, + url = {https://zenodo.org/record/6873638}, + abstract = {CPP\_SPM is a set pipelines and tools for Octave/MATLAB to process and analyze BIDS data sets using SPM.}, + urldate = {2022-10-19}, + publisher = {Zenodo}, + author = {Gau, Rémi and Barilari, Marco and Battal, Ceren and Rezk, Mohamed and Collignon, Olivier and Gurtubay, Ane and Falagiarda, Federica and MacLean, Michèle and Cerpelloni, Filippo and Shahzad, Iqra and Nunes, Márcia and Caron-Guyon, Jeanne and Chouinard-Leclaire, Christine}, + month = jul, + year = {2022}, + doi = {10.5281/ZENODO.6873638}, + keywords = {automated pipeline, BIDS, brain imaging data structure, MATLAB, MRI, neuroimaging, Octave, SPM}, +} diff --git a/docs/paper/requirements.txt b/docs/paper/requirements.txt new file mode 100644 index 00000000..58d36084 --- /dev/null +++ b/docs/paper/requirements.txt @@ -0,0 +1 @@ +ruamel.yaml