diff --git a/VERSION b/VERSION index 4e379d2b..a211f863 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.2 +0.0.3-a.1 diff --git a/redcap_bridge/tests/test_utils.py b/redcap_bridge/tests/test_utils.py index 2965d68d..48d109b4 100644 --- a/redcap_bridge/tests/test_utils.py +++ b/redcap_bridge/tests/test_utils.py @@ -1,4 +1,4 @@ -from redcap_bridge.utils import compress_record +from redcap_bridge.utils import compress_record, conversion_to_odml_table_descriptor from diglab_utils.test_utils import test_directory, initialize_test_dir @@ -13,3 +13,9 @@ def test_compressedCSV(initialize_test_dir): exp = exp_file.read() assert res == exp + +def test_conversion_to_odml_table_descriptor(initialize_test_dir): + test_dir = test_directory / 'testfiles_redcap' / 'descriptors' + + conversion_to_odml_table_descriptor(test_dir / 'Vision4Action_DATA_2023-04-13_1110.csv', session_number=5) + diff --git a/redcap_bridge/utils.py b/redcap_bridge/utils.py index 32881b30..f4c3f7dd 100644 --- a/redcap_bridge/utils.py +++ b/redcap_bridge/utils.py @@ -81,3 +81,32 @@ def remove_columns(csv_file, compressed_file=None): def exportCSVtoXLS(csv_file, compressed_file=None): read_file = pd.read_csv(csv_file, na_filter=False, dtype='str') read_file.to_excel(r'Path', index=None, header=True) + +def conversion_to_odml_table_descriptor(full_elabbook_csv, session_number): + """ + Create odml descriptor file based on the full elabbook csv file + Args: + full_elabbook_csv: path to the full csv file + session_number: number of the session you want to create a descriptor csv file for + Returns: + descriptor_elabbook_csv: csv descriptor file of the specific session given + """ + df = pd.read_csv(full_elabbook_csv) + + df_ses = df.loc[df['ses_number'] == session_number] + cols_to_melt = df_ses.columns[df_ses.columns.get_loc('ethical_protocol_id'):] + + df_melted = df_ses.melt(id_vars='record_id', value_vars=cols_to_melt) + df_melted = df_melted.rename(columns={'variable': 'Property name'}) + + value_types = df_melted['value'].apply(type).astype(str) + + # Extract only type of the value + value_types = value_types.str.split("'").str[1] + + # Add the Type value corresponding to the type of the value + df_melted['Type'] = value_types + + print(df_melted.to_string()) + +