study_da

`create(path_config_scan='config_scan.yaml', force_overwrite=False, dic_parameter_all_gen=None, dic_parameter_all_gen_naming=None, add_prefix_to_folder_names=False)`

Create a study based on the configuration file.

Parameters:

Name	Type	Description	Default
`path_config_scan`	`str`	Path to the configuration file for the scan. Defaults to "config_scan.yaml".	`'config_scan.yaml'`
`force_overwrite`	`bool`	Flag to force overwrite the study. Defaults to False.	`False`
`dic_parameter_all_gen`	`Optional[dict[str, dict[str, Any]]]`	Dictionary of parameters for the scan, if not provided through the scan config. Defaults to None.	`None`
`dic_parameter_all_gen_naming`	`Optional[dict[str, dict[str, Any]]]`	Dictionary of parameters for the naming of the scan subfolders, if not provided through the scan config. Defaults to None.	`None`
`add_prefix_to_folder_names`	`bool`	Whether to add a prefix to the folder names. Defaults to False.	`False`

Returns:

Type	Description
`tuple[str, str]`	tuple[str, str]: The path to the tree file and the name of the main configuration file.

Source code in study_da/study_da.py

def create(
    path_config_scan: str = "config_scan.yaml",
    force_overwrite: bool = False,
    dic_parameter_all_gen: Optional[dict[str, dict[str, Any]]] = None,
    dic_parameter_all_gen_naming: Optional[dict[str, dict[str, Any]]] = None,
    add_prefix_to_folder_names: bool = False,
) -> tuple[str, str]:
    """
    Create a study based on the configuration file.

    Args:
        path_config_scan (str, optional): Path to the configuration file for the scan.
            Defaults to "config_scan.yaml".
        force_overwrite (bool, optional): Flag to force overwrite the study. Defaults to False.
        dic_parameter_all_gen (Optional[dict[str, dict[str, Any]]], optional): Dictionary of
            parameters for the scan, if not provided through the scan config. Defaults to None.
        dic_parameter_all_gen_naming (Optional[dict[str, dict[str, Any]]], optional): Dictionary of
            parameters for the naming of the scan subfolders, if not provided through the scan
            config. Defaults to None.
        add_prefix_to_folder_names (bool, optional): Whether to add a prefix to the folder names.
            Defaults to False.

    Returns:
        tuple[str, str]: The path to the tree file and the name of the main configuration file.
    """
    logging.info(f"Create study from configuration file: {path_config_scan}")
    study = GenerateScan(path_config=path_config_scan)
    study.create_study(
        force_overwrite=force_overwrite,
        dic_parameter_all_gen=dic_parameter_all_gen,
        dic_parameter_all_gen_naming=dic_parameter_all_gen_naming,
        add_prefix_to_folder_names=add_prefix_to_folder_names,
    )

    # Get variables of interest for the submission
    path_tree = study.path_tree
    name_main_configuration = study.config["dependencies"]["main_configuration"]

    return path_tree, name_main_configuration

`create_single_job(name_main_configuration, name_executable_generation_1, name_executable_generation_2=None, name_executable_generation_3=None, name_study='single_job_study', force_overwrite=False)`

Create a single job study (not a parametric scan) with the specified configuration and executables. Limited to three generations.

Parameters:

Name	Type	Description	Default
`name_main_configuration`	`str`	The name of the main configuration file for the study.	required
`name_executable_generation_1`	`str`	The name of the executable for the first generation.	required
`name_executable_generation_2`	`Optional[str]`	The name of the executable for the second generation. Defaults to None.	`None`
`name_executable_generation_3`	`Optional[str]`	The name of the executable for the third generation. Defaults to None.	`None`
`name_study`	`str`	The name of the study. Defaults to "single_job_study".	`'single_job_study'`
`force_overwrite`	`bool`	Whether to force overwrite existing files. Defaults to False.	`False`

Returns:

Name	Type	Description
`str`	`str`	The path to the tree file.

Source code in study_da/study_da.py

def create_single_job(
    name_main_configuration: str,
    name_executable_generation_1: str,
    name_executable_generation_2: Optional[str] = None,
    name_executable_generation_3: Optional[str] = None,
    name_study: str = "single_job_study",
    force_overwrite: bool = False,
) -> str:
    """
    Create a single job study (not a parametric scan) with the specified configuration and
    executables. Limited to three generations.

    Args:
        name_main_configuration (str): The name of the main configuration file for the study.
        name_executable_generation_1 (str): The name of the executable for the first generation.
        name_executable_generation_2 (Optional[str], optional): The name of the executable for the
            second generation. Defaults to None.
        name_executable_generation_3 (Optional[str], optional): The name of the executable for the
            third generation. Defaults to None.
        name_study (str, optional): The name of the study. Defaults to "single_job_study".
        force_overwrite (bool, optional): Whether to force overwrite existing files.
            Defaults to False.

    Returns:
        str: The path to the tree file.
    """
    # Generate the scan dictionnary
    dic_scan = {
        "name": name_study,
        "dependencies": {"main_configuration": name_main_configuration},
        "structure": {
            "generation_1": {
                "executable": name_executable_generation_1,
            },
        },
    }

    if name_executable_generation_2 is not None:
        dic_scan["structure"]["generation_2"] = {
            "executable": name_executable_generation_2,
        }

    if name_executable_generation_3 is not None:
        dic_scan["structure"]["generation_3"] = {
            "executable": name_executable_generation_3,
        }

    # Create the study
    logging.info(f"Create single job study: {name_study}")
    study = GenerateScan(dic_scan=dic_scan)
    study.create_study(
        force_overwrite=force_overwrite,
    )

    return study.path_tree

`submit(path_tree, path_python_environment='', path_python_environment_container='', path_container_image=None, force_configure=False, dic_config_jobs=None, one_generation_at_a_time=False, keep_submit_until_done=False, wait_time=30, max_try=100, force_submit=False, dic_additional_commands_per_gen=None, dic_dependencies_per_gen=None, dic_copy_back_per_gen=None, name_config='config.yaml')`

Submits the jobs to the cluster. Note that copying back large files (e.g. json colliders) can trigger a throttling mechanism in AFS.

The following arguments are only used for HTC jobs submission: - dic_additional_commands_per_gen - dic_dependencies_per_gen - dic_copy_back_per_gen - name_config

Parameters:

Name	Type	Description	Default
`path_tree`	`str`	The path to the tree file.	required
`path_python_environment`	`str`	The path to the python environment. Default to "".	`''`
`path_python_environment_container`	`str`	The path to the python environment in the container. Default to "".	`''`
`path_container_image`	`Optional[str]`	The path to the container image. Defaults to None.	`None`
`force_configure`	`bool`	Whether to force reconfiguration. Defaults to False.	`False`
`dic_config_jobs`	`Optional[dict[str, dict[str, Any]]]`	A dictionary containing the configuration of the jobs. Defaults to None.	`None`
`one_generation_at_a_time`	`bool`	Whether to submit one full generation at a time. Defaults to False.	`False`
`keep_submit_until_done`	`bool`	Whether to keep submitting jobs until all jobs are finished or failed. Defaults to False.	`False`
`max_try`	`int`	The maximum number of tries to submit a job. Defaults to 100.	`100`
`force_submit`	`bool`	If True, jobs are resubmitted even though they failed. Defaults to False.	`False`
`wait_time`	`float`	The wait time between submissions in minutes. Defaults to 30.	`30`
`dic_additional_commands_per_gen`	`dict[int, str]`	Additional commands per generation. Defaults to None.	`None`
`dic_dependencies_per_gen`	`dict[int, list[str]]`	Dependencies per generation. Only used when doing a HTC submission. Defaults to None.	`None`
`dic_copy_back_per_gen`	`Optional[dict[int, dict[str, bool]]]`	A dictionary containing the files to copy back per generation. Accepted keys are "parquet", "yaml", "txt", "json", "zip" and "all". Defaults to None, corresponding to copying back only "light" files, i.e. parquet, yaml and txt.	`None`
`name_config`	`str`	The name of the configuration file for the study. Defaults to "config.yaml".	`'config.yaml'`

Returns:

Type	Description
`None`	None

Source code in study_da/study_da.py

def submit(
    path_tree: str,
    path_python_environment: str = "",
    path_python_environment_container: str = "",
    path_container_image: Optional[str] = None,
    force_configure: bool = False,
    dic_config_jobs: Optional[dict[str, dict[str, Any]]] = None,
    one_generation_at_a_time: bool = False,
    keep_submit_until_done: bool = False,
    wait_time: float = 30,
    max_try: int = 100,
    force_submit: bool = False,
    dic_additional_commands_per_gen: Optional[dict[int, str]] = None,
    dic_dependencies_per_gen: Optional[dict[int, list[str]]] = None,
    dic_copy_back_per_gen: Optional[dict[int, dict[str, bool]]] = None,
    name_config: str = "config.yaml",
) -> None:
    """
    Submits the jobs to the cluster. Note that copying back large files (e.g. json colliders)
    can trigger a throttling mechanism in AFS.

    The following arguments are only used for HTC jobs submission:
    - dic_additional_commands_per_gen
    - dic_dependencies_per_gen
    - dic_copy_back_per_gen
    - name_config

    Args:
        path_tree (str): The path to the tree file.
        path_python_environment (str): The path to the python environment. Default to "".
        path_python_environment_container (str): The path to the python environment in the
            container. Default to "".
        path_container_image (Optional[str], optional): The path to the container image.
            Defaults to None.
        force_configure (bool, optional): Whether to force reconfiguration. Defaults to False.
        dic_config_jobs (Optional[dict[str, dict[str, Any]]], optional): A dictionary containing
            the configuration of the jobs. Defaults to None.
        one_generation_at_a_time (bool, optional): Whether to submit one full generation at a
            time. Defaults to False.
        keep_submit_until_done (bool, optional): Whether to keep submitting jobs until all jobs
            are finished or failed. Defaults to False.
        max_try (int, optional): The maximum number of tries to submit a job. Defaults to 100.
        force_submit (bool, optional): If True, jobs are resubmitted even though they failed.
            Defaults to False.
        wait_time (float, optional): The wait time between submissions in minutes. Defaults to 30.
        dic_additional_commands_per_gen (dict[int, str], optional): Additional commands per
            generation. Defaults to None.
        dic_dependencies_per_gen (dict[int, list[str]], optional): Dependencies per generation.
            Only used when doing a HTC submission. Defaults to None.
        dic_copy_back_per_gen (Optional[dict[int, dict[str, bool]]], optional): A dictionary
            containing the files to copy back per generation. Accepted keys are "parquet",
            "yaml", "txt", "json", "zip" and "all". Defaults to None, corresponding to copying
            back only "light" files, i.e. parquet, yaml and txt.
        name_config (str, optional): The name of the configuration file for the study.
            Defaults to "config.yaml".

    Returns:
        None
    """
    # Instantiate the study (does not affect already existing study)
    study_sub = SubmitScan(
        path_tree=path_tree,
        path_python_environment=path_python_environment,
        path_python_environment_container=path_python_environment_container,
        path_container_image=path_container_image,
    )

    # Configure the jobs (will only configure if not already done)
    study_sub.configure_jobs(force_configure=force_configure, dic_config_jobs=dic_config_jobs)

    # Submit the jobs (only submit the jobs that are not already submitted or finished)
    if keep_submit_until_done:
        study_sub.keep_submit_until_done(
            wait_time=wait_time,
            max_try=max_try,
            one_generation_at_a_time=one_generation_at_a_time,
            dic_additional_commands_per_gen=dic_additional_commands_per_gen,
            dic_dependencies_per_gen=dic_dependencies_per_gen,
            dic_copy_back_per_gen=dic_copy_back_per_gen,
            name_config=name_config,
            force_submit=force_submit,
        )
    else:
        study_sub.submit(
            one_generation_at_a_time=one_generation_at_a_time,
            dic_additional_commands_per_gen=dic_additional_commands_per_gen,
            dic_dependencies_per_gen=dic_dependencies_per_gen,
            dic_copy_back_per_gen=dic_copy_back_per_gen,
            name_config=name_config,
            force_submit=force_submit,
        )