Source code for fino2py.reshaping.nova_concat

#!/usr/bin/env python
# -*- coding: utf-8 -*-


from ..dependencies import pd, Union, Dict 
from .nova_minute_by_minute import nova_minute_by_minute

'''
This function allows us to concatenate the minute-by-minute data for each participant into a single DataFrame so that we can perform group-level analysis.

In future when we have a means of creating protocol 'chunk' averages, we will also be able to use this function to concatenate the chunk averages for each participant into a single DataFrame for group-level analysis. 
But for now this is just for minute-by-minute data.

Notes
-----
- The `data_dict` parameter should be a dictionary with participant IDs as keys and minute-by-minute dataframes as values.
- The `data_dict` should be created using the `read_raw_nova_data` function.
- The Participant ID column is added at the beginning of the DataFrame to allow for sample-level analysis, and to allow for easy merging of various datasets (i.e. demographics data and or survey data from qualtrics or pavlovia).

example
-------
# Import minute-by-minute data for each participant

data_dict: Dict[str, pd.DataFrame] = {}

for sub_folder in project_folder.iterdir():
    try:
        ID, df = read_raw_nova_data(sub_folder, interval='1s', save_csv=True)

        data_dict[ID] = df

    except Exception as e:
        print(f'Error processing folder {sub_folder.name}: {e}')
}

concatenated_df = nova_concat(data_dict)
'''

[docs]def nova_concat(data_dict: Dict) -> pd.DataFrame: ''' Concatenate the minute-by-minute data for each participant into a single DataFrame. This function takes a dictionary of minute-by-minute dataframes (`data_dict`) and concatenates them into a single DataFrame. The dictionary should be created using the `read_raw_nova_data` function. The Participant ID column is added at the beginning of the DataFrame to allow for sample-level analysis. Parameters ---------- data_dict : Dict A dictionary containing Participant ID as keys and minute-by-minute dataframes as values. Returns ------- pd.DataFrame A concatenated DataFrame containing minute-by-minute data for all participants. Example ------- # Import minute-by-minute data for each participant with a dictionary comprehension data_dict concatenated_df = nova_concat(data_dict) ''' frames = [] for df_id, df in data_dict.items(): try: df = nova_minute_by_minute(df, df_id) frames.append(df) except Exception as e: print(f'Error processing folder {df_id}: {e}') # Find the widest dataframe widest = max(frames, key=lambda x: x.shape[1]) # Add the missing columns to each dataframe frames = [df.reindex(widest.columns, axis=1) for df in frames] # Concatenate the dataframes on the Participant ID and Time(sec) columns df = pd.concat(frames) return df