Source code for fino2py.reshaping.generate_protocol_averages

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Generate averages for each section of the experimental protocol from already ingested finometer data.

This module provides a function to calculate averages for each section of the experimental protocol. 
The resulting averages are concatenated into a single row, allowing for easy merging of participants into a single dataframe. 
Optionally, the function can save the reshaped data as a CSV file.

Notes
-----
- The `frame` parameter should be a pandas DataFrame object containing data produced by `read_raw_finometer_data` function.
- The `id` parameter should be a string representing the participant ID (should also be produced by `read_raw_finometer_data` function).
- The `times` parameter should be a dictionary of tuples representing the time periods for calculating averages. There are functions in the times section to help formatting times.

Example
-------
# Import averaged finometer data
frame, id = read_raw_finometer_data('/path/to/folder')

# Define the time periods for calculating averages
times = {
    'baseline': ('00:03:00, '00:10:00'),
    'task': ('00:10:00', '00:19:00'),
    ...
}

# Generate protocol averages
averages = generate_protocol_averages(frame, id, times=times, save_csv=True)
'''




from ..dependencies import pl, pd, reduce
from .create_chunk import create_chunk


[docs]def generate_protocol_averages(frame, id, times=None, save_csv=None):
    '''A function that imports the finometer dataframes (which have already been processed from the raw data)
    to produce averages for each section of the experimental protocol concatenated into a single row so that participants can then be merged into a single dataframe.

    Parameters
    ----------
    frame : pandas.DataFrame 
        The DataFrame containing the averaged finometer data
    id : str
        The participant ID
    save_csv : bool, optional
        If True, the imported data will be saved as a .csv file in the same folder as the .csv file, 
        this is not always needed and should be used sparingly
    times : dict, optional
        A dictionary of tuples of times, with the keys being the names of the time periods.

    Returns
    -------
    pandas.DataFrame
        A DataFrame with the mean values of the given columns during each time period of the study.

    Raises
    ------
    TypeError
        If frame is not a pandas.DataFrame object
        If id is not a string
    ValueError
        If times is not provided as a dictionary with at least one key-value pair
        If there are not enough times provided for a given time period
        If there are too many times provided for a given time period
    '''

    # check if frame is a pandas.DataFrame object
    if not isinstance(frame, pd.DataFrame):
        raise TypeError('''
        frame must be a pandas.DataFrame object, produced by the read_raw_finometer_data function, 
        have you run the read_raw_finometer_data function on the data?''')

    if not isinstance(id, str):
        raise TypeError('id must be a string')

    if not times:
        raise ValueError("times must be a dictionary and at least one key-value pair must be provided.")
    
    # Create an empty list of dataframes, each representing a chunk of the protocol
    chunks = []
    
    for i in times.keys():
        if len(times[i]) < 2:
            raise ValueError(f"There are not enough times provided for the {i}.")
        elif len(times[i]) > 2:
            raise ValueError(f"There are too many times provided for the {i}.")
        elif len(times[i]) == 2:
            if times[i][0] < times[i][1]:
                chunks.append(create_chunk(frame, id, i, times[i][0], times[i][1]))



    data_merge = reduce(lambda left, right: pd.merge(left, right, on=["Participant ID"], how="outer"), chunks)
    data_merge.set_index('Participant ID', inplace=True)

    if save_csv:
        path = pl.Path(save_csv)
        data_merge.to_csv( path / f"{id} protocol_averages.csv")
        print(f"Saved {id} protocol averages.csv to {path.stem}")

    return data_merge