Skip to content

postprocess

aggregate_output_data(path_tree, l_group_by_parameters, function_to_aggregate=min, generation_of_interest=2, name_output='output_particles.parquet', write_output=True, path_output=None, only_keep_lost_particles=True, dic_parameters_of_interest=None, l_parameters_to_keep=None, name_template_parameters='parameters_lhc.yaml', path_template_parameters=None, force_overwrite=False)

Aggregates output data from simulation files.

Parameters:

Name Type Description Default
path_tree str

The path to the tree file.

required
l_group_by_parameters list

List of parameters to group by.

required
function_to_aggregate callable

Function to aggregate the grouped data.

min
generation_of_interest int

The generation of interest. Defaults to 2.

2
name_output str

The name of the output file. Defaults to "output_particles.parquet".

'output_particles.parquet'
write_output bool

Flag to indicate if the output should be written to a file. Defaults to True.

True
path_output str

The path to the output file. If not provided, the default output file will be in the study folder as 'da.parquet'. Defaults to None.

None
only_keep_lost_particles bool

Flag to indicate if only lost particles should be kept. Defaults to True.

True
dic_parameters_of_interest dict

Dictionary of parameters of interest. Defaults to None.

None
l_parameters_to_keep list

List of parameters to keep. Defaults to None.

None
name_template_parameters str

The name of the template parameters file associating each parameter to a list of keys. Defaults to "parameters_lhc.yaml", which is already contained in the study-da package, and includes the main usual parameters.

'parameters_lhc.yaml'
path_template_parameters str

The path to the template parameters file. Must be provided if a no template already contained in study-da is provided through the argument name_template_parameters. Defaults to None.

None
force_overwrite bool

Flag to indicate if the output file should be overwritten if it already exists. Defaults to False.

False

Returns:

Type Description
DataFrame

pd.DataFrame: The final aggregated DataFrame.

Source code in study_da/postprocess/postprocess.py
def aggregate_output_data(
    path_tree: str,
    l_group_by_parameters: List[str],
    function_to_aggregate: Callable = min,
    generation_of_interest: int = 2,
    name_output: str = "output_particles.parquet",
    write_output: bool = True,
    path_output: Optional[str] = None,
    only_keep_lost_particles: bool = True,
    dic_parameters_of_interest: Optional[Dict[str, List[str]]] = None,
    l_parameters_to_keep: Optional[List[str]] = None,
    name_template_parameters: str = "parameters_lhc.yaml",
    path_template_parameters: Optional[str] = None,
    force_overwrite: bool = False,
) -> pd.DataFrame:
    """
    Aggregates output data from simulation files.

    Args:
        path_tree (str): The path to the tree file.
        l_group_by_parameters (list): List of parameters to group by.
        function_to_aggregate (callable, optional): Function to aggregate the grouped data.
        generation_of_interest (int, optional): The generation of interest. Defaults to 2.
        name_output (str, optional): The name of the output file. Defaults to "output_particles.parquet".
        write_output (bool, optional): Flag to indicate if the output should be written to a file.
            Defaults to True.
        path_output (str, optional): The path to the output file. If not provided, the default
            output file will be in the study folder as 'da.parquet'. Defaults to None.
        only_keep_lost_particles (bool, optional): Flag to indicate if only lost particles should be
            kept. Defaults to True.
        dic_parameters_of_interest (dict, optional): Dictionary of parameters of interest. Defaults
            to None.
        l_parameters_to_keep (list, optional): List of parameters to keep. Defaults to None.
        name_template_parameters (str, optional): The name of the template parameters file
            associating each parameter to a list of keys. Defaults to "parameters_lhc.yaml", which
            is already contained in the study-da package, and includes the main usual parameters.
        path_template_parameters (str, optional): The path to the template parameters file. Must
            be provided if a no template already contained in study-da is provided through the
            argument name_template_parameters. Defaults to None.
        force_overwrite (bool, optional): Flag to indicate if the output file should be overwritten
            if it already exists. Defaults to False.

    Returns:
        pd.DataFrame: The final aggregated DataFrame.
    """
    # Check it the output doesn't already exist and ask for confirmation to overwrite
    dic_tree, _ = load_dic_from_path(path_tree)
    absolute_path_study = dic_tree["absolute_path"]
    if path_output is None:
        path_output = os.path.join(absolute_path_study, "da.parquet")
    if os.path.exists(path_output) and not force_overwrite:
        input_user = input(
            f"The output file {path_output} already exists. Do you want to overwrite it? (y/n) "
        )
        if input_user.lower() != "y":
            logging.warning("Output file not overwritten")
            return pd.read_parquet(path_output)

    logging.info("Analysis of output simulation files started")

    dic_all_jobs = ConfigJobs(dic_tree,starting_depth=-len(Path(path_tree).parts) + 2).find_all_jobs()

    l_df_sim = get_particles_data(
        dic_all_jobs, absolute_path_study, generation_of_interest, name_output
    )

    default_path_template_parameters = False
    if dic_parameters_of_interest is None:
        if path_template_parameters is not None:
            logging.info("Loading parameters of interest from the provided configuration file")
        else:
            if name_template_parameters is None:
                raise ValueError(
                    "No template configuration file provided for the parameters of interest"
                )
            logging.info("Loading parameters of interest from the template configuration file")
            path_template_parameters = os.path.join(
                os.path.dirname(inspect.getfile(aggregate_output_data)),
                "configs",
                name_template_parameters,
            )
            default_path_template_parameters = True
        dic_parameters_of_interest, _ = load_dic_from_path(path_template_parameters)

    l_df_output = add_parameters_from_config(
        l_df_sim, dic_parameters_of_interest, default_path_template_parameters
    )

    df_final = merge_and_group_by_parameters_of_interest(
        l_df_output,
        l_group_by_parameters,
        only_keep_lost_particles,
        l_parameters_to_keep,
        function_to_aggregate,
    )

    # Fix the LHC version type
    df_final = fix_LHC_version(df_final)

    if write_output:
        df_final.to_parquet(path_output)
    elif path_output is not None:
        logging.warning("Output path provided but write_output set to False, no output saved")

    logging.info("Final dataframe for current set of simulations: %s", df_final)
    return df_final