functions

`__list_setups(api_call, output_format='object')` ¶

Helper function to parse API calls which are lists of setups

Source code in openml/setups/functions.py

def __list_setups(
    api_call: str, output_format: Literal["dict", "dataframe", "object"] = "object"
) -> dict[int, dict] | pd.DataFrame | dict[int, OpenMLSetup]:
    """Helper function to parse API calls which are lists of setups"""
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",))
    openml_uri = "http://openml.org/openml"
    # Minimalistic check if the XML is useful
    if "oml:setups" not in setups_dict:
        raise ValueError(
            'Error in return XML, does not contain "oml:setups":' " %s" % str(setups_dict),
        )

    if "@xmlns:oml" not in setups_dict["oml:setups"]:
        raise ValueError(
            "Error in return XML, does not contain "
            '"oml:setups"/@xmlns:oml: %s' % str(setups_dict),
        )

    if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri:
        raise ValueError(
            "Error in return XML, value of  "
            '"oml:seyups"/@xmlns:oml is not '
            f'"{openml_uri}": {setups_dict!s}',
        )

    assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type(setups_dict["oml:setups"])

    setups = {}
    for setup_ in setups_dict["oml:setups"]["oml:setup"]:
        # making it a dict to give it the right format
        current = _create_setup_from_xml(
            {"oml:setup_parameters": setup_},
            output_format=output_format,
        )
        if output_format == "object":
            setups[current.setup_id] = current  # type: ignore
        else:
            setups[current["setup_id"]] = current  # type: ignore

    if output_format == "dataframe":
        setups = pd.DataFrame.from_dict(setups, orient="index")

    return setups

`get_setup(setup_id)` ¶

Downloads the setup (configuration) description from OpenML and returns a structured object

Parameters:

Name	Type	Description	Default
`setup_id`	`int`	The Openml setup_id	required

Returns:

Type	Description
`OpenMLSetup (an initialized openml setup object)`

Source code in openml/setups/functions.py

def get_setup(setup_id: int) -> OpenMLSetup:
    """
     Downloads the setup (configuration) description from OpenML
     and returns a structured object

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    OpenMLSetup (an initialized openml setup object)
    """
    setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id)
    setup_dir.mkdir(exist_ok=True, parents=True)

    setup_file = setup_dir / "description.xml"

    try:
        return _get_cached_setup(setup_id)
    except openml.exceptions.OpenMLCacheException:
        url_suffix = "/setup/%d" % setup_id
        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
        with setup_file.open("w", encoding="utf8") as fh:
            fh.write(setup_xml)

    result_dict = xmltodict.parse(setup_xml)
    return _create_setup_from_xml(result_dict, output_format="object")  # type: ignore

`initialize_model(setup_id)` ¶

Initialized a model based on a setup_id (i.e., using the exact same parameter settings)

Parameters:

Name	Type	Description	Default
`setup_id`	`int`	The Openml setup_id	required

Returns:

Type	Description
`model`

Source code in openml/setups/functions.py

def initialize_model(setup_id: int) -> Any:
    """
    Initialized a model based on a setup_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    model
    """
    setup = get_setup(setup_id)
    flow = openml.flows.get_flow(setup.flow_id)

    # instead of using scikit-learns or any other library's "set_params" function, we override the
    # OpenMLFlow objects default parameter value so we can utilize the
    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
    if setup.parameters is not None:
        for hyperparameter in setup.parameters.values():
            structure = flow.get_structure("flow_id")
            if len(structure[hyperparameter.flow_id]) > 0:
                subflow = flow.get_subflow(structure[hyperparameter.flow_id])
            else:
                subflow = flow
            subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value

    return flow.extension.flow_to_model(flow)

`list_setups(offset=None, size=None, flow=None, tag=None, setup=None, output_format='object')` ¶

List all setups matching all of the given filters.

Parameters:

Name	Type	Description	Default
`offset`	`int`		`None`
`size`	`int`		`None`
`flow`	`int`		`None`
`tag`	`str`		`None`
`setup`	`Iterable[int]`		`None`
`output_format`	`Literal['object', 'dict', 'dataframe']`	The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame	`'object'`

Returns:

Type	Description
`dict or dataframe`

Source code in openml/setups/functions.py

def list_setups(  # noqa: PLR0913
    offset: int | None = None,
    size: int | None = None,
    flow: int | None = None,
    tag: str | None = None,
    setup: Iterable[int] | None = None,
    output_format: Literal["object", "dict", "dataframe"] = "object",
) -> dict | pd.DataFrame:
    """
    List all setups matching all of the given filters.

    Parameters
    ----------
    offset : int, optional
    size : int, optional
    flow : int, optional
    tag : str, optional
    setup : Iterable[int], optional
    output_format: str, optional (default='object')
        The parameter decides the format of the output.
        - If 'dict' the output is a dict of dict
        - If 'dataframe' the output is a pandas DataFrame

    Returns
    -------
    dict or dataframe
    """
    if output_format not in ["dataframe", "dict", "object"]:
        raise ValueError(
            "Invalid output format selected. " "Only 'dict', 'object', or 'dataframe' applicable.",
        )

    # TODO: [0.15]
    if output_format == "dict":
        msg = (
            "Support for `output_format` of 'dict' will be removed in 0.15. "
            "To ensure your code will continue to work, "
            "use `output_format`='dataframe' or `output_format`='object'."
        )
        warnings.warn(msg, category=FutureWarning, stacklevel=2)

    batch_size = 1000  # batch size for setups is lower
    return openml.utils._list_all(  # type: ignore
        list_output_format=output_format,  # type: ignore
        listing_call=_list_setups,
        offset=offset,
        size=size,
        flow=flow,
        tag=tag,
        setup=setup,
        batch_size=batch_size,
    )

`setup_exists(flow)` ¶

Checks whether a hyperparameter configuration already exists on the server.

Parameters:

Name	Type	Description	Default
`flow`	`OpenMLFlow`	The openml flow object. Should have flow id present for the main flow and all subflows (i.e., it should be downloaded from the server by means of flow.get, and not instantiated locally)	required

Returns:

Name	Type	Description
`setup_id`	`int`	setup id iff exists, False otherwise

Source code in openml/setups/functions.py

def setup_exists(flow: OpenMLFlow) -> int:
    """
    Checks whether a hyperparameter configuration already exists on the server.

    Parameters
    ----------
    flow : OpenMLFlow
        The openml flow object. Should have flow id present for the main flow
        and all subflows (i.e., it should be downloaded from the server by
        means of flow.get, and not instantiated locally)

    Returns
    -------
    setup_id : int
        setup id iff exists, False otherwise
    """
    # sadly, this api call relies on a run object
    openml.flows.functions._check_flow_for_server_id(flow)
    if flow.model is None:
        raise ValueError("Flow should have model field set with the actual model.")
    if flow.extension is None:
        raise ValueError("Flow should have model field set with the correct extension.")

    # checks whether the flow exists on the server and flow ids align
    exists = flow_exists(flow.name, flow.external_version)
    if exists != flow.flow_id:
        raise ValueError(
            f"Local flow id ({flow.id}) differs from server id ({exists}). "
            "If this issue persists, please contact the developers.",
        )

    openml_param_settings = flow.extension.obtain_parameter_values(flow)
    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
    file_elements = {
        "description": ("description.arff", description),
    }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
    result = openml._api_calls._perform_api_call(
        "/setup/exists/",
        "post",
        file_elements=file_elements,
    )
    result_dict = xmltodict.parse(result)
    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
    return setup_id if setup_id > 0 else False

functions

__list_setups(api_call, output_format='object') ¶

get_setup(setup_id) ¶

initialize_model(setup_id) ¶

list_setups(offset=None, size=None, flow=None, tag=None, setup=None, output_format='object') ¶

setup_exists(flow) ¶

`__list_setups(api_call, output_format='object')` ¶

`get_setup(setup_id)` ¶

`initialize_model(setup_id)` ¶

`list_setups(offset=None, size=None, flow=None, tag=None, setup=None, output_format='object')` ¶

`setup_exists(flow)` ¶