Skip to content

study_da

create(path_config_scan='config_scan.yaml', force_overwrite=False, dic_parameter_all_gen=None, dic_parameter_all_gen_naming=None, add_prefix_to_folder_names=False)

Create a study based on the configuration file.

Parameters:

Name Type Description Default
path_config_scan str

Path to the configuration file for the scan. Defaults to "config_scan.yaml".

'config_scan.yaml'
force_overwrite bool

Flag to force overwrite the study. Defaults to False.

False
dic_parameter_all_gen Optional[dict[str, dict[str, Any]]]

Dictionary of parameters for the scan, if not provided through the scan config. Defaults to None.

None
dic_parameter_all_gen_naming Optional[dict[str, dict[str, Any]]]

Dictionary of parameters for the naming of the scan subfolders, if not provided through the scan config. Defaults to None.

None
add_prefix_to_folder_names bool

Whether to add a prefix to the folder names. Defaults to False.

False

Returns:

Type Description
tuple[str, str]

tuple[str, str]: The path to the tree file and the name of the main configuration file.

Source code in study_da/study_da.py
def create(
    path_config_scan: str = "config_scan.yaml",
    force_overwrite: bool = False,
    dic_parameter_all_gen: Optional[dict[str, dict[str, Any]]] = None,
    dic_parameter_all_gen_naming: Optional[dict[str, dict[str, Any]]] = None,
    add_prefix_to_folder_names: bool = False,
) -> tuple[str, str]:
    """
    Create a study based on the configuration file.

    Args:
        path_config_scan (str, optional): Path to the configuration file for the scan.
            Defaults to "config_scan.yaml".
        force_overwrite (bool, optional): Flag to force overwrite the study. Defaults to False.
        dic_parameter_all_gen (Optional[dict[str, dict[str, Any]]], optional): Dictionary of
            parameters for the scan, if not provided through the scan config. Defaults to None.
        dic_parameter_all_gen_naming (Optional[dict[str, dict[str, Any]]], optional): Dictionary of
            parameters for the naming of the scan subfolders, if not provided through the scan
            config. Defaults to None.
        add_prefix_to_folder_names (bool, optional): Whether to add a prefix to the folder names.
            Defaults to False.

    Returns:
        tuple[str, str]: The path to the tree file and the name of the main configuration file.
    """
    logging.info(f"Create study from configuration file: {path_config_scan}")
    study = GenerateScan(path_config=path_config_scan)
    study.create_study(
        force_overwrite=force_overwrite,
        dic_parameter_all_gen=dic_parameter_all_gen,
        dic_parameter_all_gen_naming=dic_parameter_all_gen_naming,
        add_prefix_to_folder_names=add_prefix_to_folder_names,
    )

    # Get variables of interest for the submission
    path_tree = study.path_tree
    name_main_configuration = study.config["dependencies"]["main_configuration"]

    return path_tree, name_main_configuration

create_single_job(name_main_configuration, name_executable_generation_1, name_executable_generation_2=None, name_executable_generation_3=None, name_study='single_job_study', force_overwrite=False)

Create a single job study (not a parametric scan) with the specified configuration and executables. Limited to three generations.

Parameters:

Name Type Description Default
name_main_configuration str

The name of the main configuration file for the study.

required
name_executable_generation_1 str

The name of the executable for the first generation.

required
name_executable_generation_2 Optional[str]

The name of the executable for the second generation. Defaults to None.

None
name_executable_generation_3 Optional[str]

The name of the executable for the third generation. Defaults to None.

None
name_study str

The name of the study. Defaults to "single_job_study".

'single_job_study'
force_overwrite bool

Whether to force overwrite existing files. Defaults to False.

False

Returns:

Name Type Description
str str

The path to the tree file.

Source code in study_da/study_da.py
def create_single_job(
    name_main_configuration: str,
    name_executable_generation_1: str,
    name_executable_generation_2: Optional[str] = None,
    name_executable_generation_3: Optional[str] = None,
    name_study: str = "single_job_study",
    force_overwrite: bool = False,
) -> str:
    """
    Create a single job study (not a parametric scan) with the specified configuration and
    executables. Limited to three generations.

    Args:
        name_main_configuration (str): The name of the main configuration file for the study.
        name_executable_generation_1 (str): The name of the executable for the first generation.
        name_executable_generation_2 (Optional[str], optional): The name of the executable for the
            second generation. Defaults to None.
        name_executable_generation_3 (Optional[str], optional): The name of the executable for the
            third generation. Defaults to None.
        name_study (str, optional): The name of the study. Defaults to "single_job_study".
        force_overwrite (bool, optional): Whether to force overwrite existing files.
            Defaults to False.

    Returns:
        str: The path to the tree file.
    """
    # Generate the scan dictionnary
    dic_scan = {
        "name": name_study,
        "dependencies": {"main_configuration": name_main_configuration},
        "structure": {
            "generation_1": {
                "executable": name_executable_generation_1,
            },
        },
    }

    if name_executable_generation_2 is not None:
        dic_scan["structure"]["generation_2"] = {
            "executable": name_executable_generation_2,
        }

    if name_executable_generation_3 is not None:
        dic_scan["structure"]["generation_3"] = {
            "executable": name_executable_generation_3,
        }

    # Create the study
    logging.info(f"Create single job study: {name_study}")
    study = GenerateScan(dic_scan=dic_scan)
    study.create_study(
        force_overwrite=force_overwrite,
    )

    return study.path_tree

submit(path_tree, path_python_environment='', path_python_environment_container='', path_container_image=None, force_configure=False, dic_config_jobs=None, one_generation_at_a_time=False, keep_submit_until_done=False, wait_time=30, max_try=100, force_submit=False, dic_additional_commands_per_gen=None, dic_dependencies_per_gen=None, dic_copy_back_per_gen=None, name_config='config.yaml')

Submits the jobs to the cluster. Note that copying back large files (e.g. json colliders) can trigger a throttling mechanism in AFS.

The following arguments are only used for HTC jobs submission: - dic_additional_commands_per_gen - dic_dependencies_per_gen - dic_copy_back_per_gen - name_config

Parameters:

Name Type Description Default
path_tree str

The path to the tree file.

required
path_python_environment str

The path to the python environment. Default to "".

''
path_python_environment_container str

The path to the python environment in the container. Default to "".

''
path_container_image Optional[str]

The path to the container image. Defaults to None.

None
force_configure bool

Whether to force reconfiguration. Defaults to False.

False
dic_config_jobs Optional[dict[str, dict[str, Any]]]

A dictionary containing the configuration of the jobs. Defaults to None.

None
one_generation_at_a_time bool

Whether to submit one full generation at a time. Defaults to False.

False
keep_submit_until_done bool

Whether to keep submitting jobs until all jobs are finished or failed. Defaults to False.

False
max_try int

The maximum number of tries to submit a job. Defaults to 100.

100
force_submit bool

If True, jobs are resubmitted even though they failed. Defaults to False.

False
wait_time float

The wait time between submissions in minutes. Defaults to 30.

30
dic_additional_commands_per_gen dict[int, str]

Additional commands per generation. Defaults to None.

None
dic_dependencies_per_gen dict[int, list[str]]

Dependencies per generation. Only used when doing a HTC submission. Defaults to None.

None
dic_copy_back_per_gen Optional[dict[int, dict[str, bool]]]

A dictionary containing the files to copy back per generation. Accepted keys are "parquet", "yaml", "txt", "json", "zip" and "all". Defaults to None, corresponding to copying back only "light" files, i.e. parquet, yaml and txt.

None
name_config str

The name of the configuration file for the study. Defaults to "config.yaml".

'config.yaml'

Returns:

Type Description
None

None

Source code in study_da/study_da.py
def submit(
    path_tree: str,
    path_python_environment: str = "",
    path_python_environment_container: str = "",
    path_container_image: Optional[str] = None,
    force_configure: bool = False,
    dic_config_jobs: Optional[dict[str, dict[str, Any]]] = None,
    one_generation_at_a_time: bool = False,
    keep_submit_until_done: bool = False,
    wait_time: float = 30,
    max_try: int = 100,
    force_submit: bool = False,
    dic_additional_commands_per_gen: Optional[dict[int, str]] = None,
    dic_dependencies_per_gen: Optional[dict[int, list[str]]] = None,
    dic_copy_back_per_gen: Optional[dict[int, dict[str, bool]]] = None,
    name_config: str = "config.yaml",
) -> None:
    """
    Submits the jobs to the cluster. Note that copying back large files (e.g. json colliders)
    can trigger a throttling mechanism in AFS.

    The following arguments are only used for HTC jobs submission:
    - dic_additional_commands_per_gen
    - dic_dependencies_per_gen
    - dic_copy_back_per_gen
    - name_config

    Args:
        path_tree (str): The path to the tree file.
        path_python_environment (str): The path to the python environment. Default to "".
        path_python_environment_container (str): The path to the python environment in the
            container. Default to "".
        path_container_image (Optional[str], optional): The path to the container image.
            Defaults to None.
        force_configure (bool, optional): Whether to force reconfiguration. Defaults to False.
        dic_config_jobs (Optional[dict[str, dict[str, Any]]], optional): A dictionary containing
            the configuration of the jobs. Defaults to None.
        one_generation_at_a_time (bool, optional): Whether to submit one full generation at a
            time. Defaults to False.
        keep_submit_until_done (bool, optional): Whether to keep submitting jobs until all jobs
            are finished or failed. Defaults to False.
        max_try (int, optional): The maximum number of tries to submit a job. Defaults to 100.
        force_submit (bool, optional): If True, jobs are resubmitted even though they failed.
            Defaults to False.
        wait_time (float, optional): The wait time between submissions in minutes. Defaults to 30.
        dic_additional_commands_per_gen (dict[int, str], optional): Additional commands per
            generation. Defaults to None.
        dic_dependencies_per_gen (dict[int, list[str]], optional): Dependencies per generation.
            Only used when doing a HTC submission. Defaults to None.
        dic_copy_back_per_gen (Optional[dict[int, dict[str, bool]]], optional): A dictionary
            containing the files to copy back per generation. Accepted keys are "parquet",
            "yaml", "txt", "json", "zip" and "all". Defaults to None, corresponding to copying
            back only "light" files, i.e. parquet, yaml and txt.
        name_config (str, optional): The name of the configuration file for the study.
            Defaults to "config.yaml".

    Returns:
        None
    """
    # Instantiate the study (does not affect already existing study)
    study_sub = SubmitScan(
        path_tree=path_tree,
        path_python_environment=path_python_environment,
        path_python_environment_container=path_python_environment_container,
        path_container_image=path_container_image,
    )

    # Configure the jobs (will only configure if not already done)
    study_sub.configure_jobs(force_configure=force_configure, dic_config_jobs=dic_config_jobs)

    # Submit the jobs (only submit the jobs that are not already submitted or finished)
    if keep_submit_until_done:
        study_sub.keep_submit_until_done(
            wait_time=wait_time,
            max_try=max_try,
            one_generation_at_a_time=one_generation_at_a_time,
            dic_additional_commands_per_gen=dic_additional_commands_per_gen,
            dic_dependencies_per_gen=dic_dependencies_per_gen,
            dic_copy_back_per_gen=dic_copy_back_per_gen,
            name_config=name_config,
            force_submit=force_submit,
        )
    else:
        study_sub.submit(
            one_generation_at_a_time=one_generation_at_a_time,
            dic_additional_commands_per_gen=dic_additional_commands_per_gen,
            dic_dependencies_per_gen=dic_dependencies_per_gen,
            dic_copy_back_per_gen=dic_copy_back_per_gen,
            name_config=name_config,
            force_submit=force_submit,
        )