Skip to content

setups

OpenMLParameter

Parameter object (used in setup).

Parameters:

Name Type Description Default
input_id int

The input id from the openml database

required
flow

The flow to which this parameter is associated

required
flow

The name of the flow (no version number) to which this parameter is associated

required
full_name str

The name of the flow and parameter combined

required
parameter_name str

The name of the parameter

required
data_type str

The datatype of the parameter. generally unused for sklearn flows

required
default_value str

The default value. For sklearn parameters, this is unknown and a default value is selected arbitrarily

required
value str

If the parameter was set, the value that it was set to.

required
Source code in openml/setups/setup.py
class OpenMLParameter:
    """Parameter object (used in setup).

    Parameters
    ----------
    input_id : int
        The input id from the openml database
    flow id : int
        The flow to which this parameter is associated
    flow name : str
        The name of the flow (no version number) to which this parameter
        is associated
    full_name : str
        The name of the flow and parameter combined
    parameter_name : str
        The name of the parameter
    data_type : str
        The datatype of the parameter. generally unused for sklearn flows
    default_value : str
        The default value. For sklearn parameters, this is unknown and a
        default value is selected arbitrarily
    value : str
        If the parameter was set, the value that it was set to.
    """

    def __init__(  # noqa: PLR0913
        self,
        input_id: int,
        flow_id: int,
        flow_name: str,
        full_name: str,
        parameter_name: str,
        data_type: str,
        default_value: str,
        value: str,
    ):
        self.id = input_id
        self.flow_id = flow_id
        self.flow_name = flow_name
        self.full_name = full_name
        self.parameter_name = parameter_name
        self.data_type = data_type
        self.default_value = default_value
        self.value = value

    def __repr__(self) -> str:
        header = "OpenML Parameter"
        header = "{}\n{}\n".format(header, "=" * len(header))

        fields = {
            "ID": self.id,
            "Flow ID": self.flow_id,
            # "Flow Name": self.flow_name,
            "Flow Name": self.full_name,
            "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
            "Parameter Name": self.parameter_name,
        }
        # indented prints for parameter attributes
        # indention = 2 spaces + 1 | + 2 underscores
        indent = "{}|{}".format(" " * 2, "_" * 2)
        parameter_data_type = f"{indent}Data Type"
        fields[parameter_data_type] = self.data_type
        parameter_default = f"{indent}Default"
        fields[parameter_default] = self.default_value
        parameter_value = f"{indent}Value"
        fields[parameter_value] = self.value

        # determines the order in which the information will be printed
        order = [
            "ID",
            "Flow ID",
            "Flow Name",
            "Flow URL",
            "Parameter Name",
            parameter_data_type,
            parameter_default,
            parameter_value,
        ]
        _fields = [(key, fields[key]) for key in order if key in fields]

        longest_field_name_length = max(len(name) for name, _ in _fields)
        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
        body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
        return header + body

OpenMLSetup

Setup object (a.k.a. Configuration).

Parameters:

Name Type Description Default
setup_id int

The OpenML setup id

required
flow_id int

The flow that it is build upon

required
parameters dict

The setting of the parameters

required
Source code in openml/setups/setup.py
class OpenMLSetup:
    """Setup object (a.k.a. Configuration).

    Parameters
    ----------
    setup_id : int
        The OpenML setup id
    flow_id : int
        The flow that it is build upon
    parameters : dict
        The setting of the parameters
    """

    def __init__(self, setup_id: int, flow_id: int, parameters: dict[int, Any] | None):
        if not isinstance(setup_id, int):
            raise ValueError("setup id should be int")

        if not isinstance(flow_id, int):
            raise ValueError("flow id should be int")

        if parameters is not None and not isinstance(parameters, dict):
            raise ValueError("parameters should be dict")

        self.setup_id = setup_id
        self.flow_id = flow_id
        self.parameters = parameters

    def __repr__(self) -> str:
        header = "OpenML Setup"
        header = "{}\n{}\n".format(header, "=" * len(header))

        fields = {
            "Setup ID": self.setup_id,
            "Flow ID": self.flow_id,
            "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
            "# of Parameters": (
                len(self.parameters) if self.parameters is not None else float("nan")
            ),
        }

        # determines the order in which the information will be printed
        order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
        _fields = [(key, fields[key]) for key in order if key in fields]

        longest_field_name_length = max(len(name) for name, _ in _fields)
        field_line_format = f"{{:.<{longest_field_name_length}}}: {{}}"
        body = "\n".join(field_line_format.format(name, value) for name, value in _fields)
        return header + body

get_setup(setup_id)

Downloads the setup (configuration) description from OpenML and returns a structured object

Parameters:

Name Type Description Default
setup_id int

The Openml setup_id

required

Returns:

Type Description
OpenMLSetup (an initialized openml setup object)
Source code in openml/setups/functions.py
def get_setup(setup_id: int) -> OpenMLSetup:
    """
     Downloads the setup (configuration) description from OpenML
     and returns a structured object

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    OpenMLSetup (an initialized openml setup object)
    """
    setup_dir = Path(config.get_cache_directory()) / "setups" / str(setup_id)
    setup_dir.mkdir(exist_ok=True, parents=True)

    setup_file = setup_dir / "description.xml"

    try:
        return _get_cached_setup(setup_id)
    except openml.exceptions.OpenMLCacheException:
        url_suffix = "/setup/%d" % setup_id
        setup_xml = openml._api_calls._perform_api_call(url_suffix, "get")
        with setup_file.open("w", encoding="utf8") as fh:
            fh.write(setup_xml)

    result_dict = xmltodict.parse(setup_xml)
    return _create_setup_from_xml(result_dict, output_format="object")  # type: ignore

initialize_model(setup_id)

Initialized a model based on a setup_id (i.e., using the exact same parameter settings)

Parameters:

Name Type Description Default
setup_id int

The Openml setup_id

required

Returns:

Type Description
model
Source code in openml/setups/functions.py
def initialize_model(setup_id: int) -> Any:
    """
    Initialized a model based on a setup_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    model
    """
    setup = get_setup(setup_id)
    flow = openml.flows.get_flow(setup.flow_id)

    # instead of using scikit-learns or any other library's "set_params" function, we override the
    # OpenMLFlow objects default parameter value so we can utilize the
    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
    if setup.parameters is not None:
        for hyperparameter in setup.parameters.values():
            structure = flow.get_structure("flow_id")
            if len(structure[hyperparameter.flow_id]) > 0:
                subflow = flow.get_subflow(structure[hyperparameter.flow_id])
            else:
                subflow = flow
            subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value

    return flow.extension.flow_to_model(flow)

list_setups(offset=None, size=None, flow=None, tag=None, setup=None, output_format='object')

List all setups matching all of the given filters.

Parameters:

Name Type Description Default
offset int
None
size int
None
flow int
None
tag str
None
setup Iterable[int]
None
output_format Literal['object', 'dict', 'dataframe']

The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame

'object'

Returns:

Type Description
dict or dataframe
Source code in openml/setups/functions.py
def list_setups(  # noqa: PLR0913
    offset: int | None = None,
    size: int | None = None,
    flow: int | None = None,
    tag: str | None = None,
    setup: Iterable[int] | None = None,
    output_format: Literal["object", "dict", "dataframe"] = "object",
) -> dict | pd.DataFrame:
    """
    List all setups matching all of the given filters.

    Parameters
    ----------
    offset : int, optional
    size : int, optional
    flow : int, optional
    tag : str, optional
    setup : Iterable[int], optional
    output_format: str, optional (default='object')
        The parameter decides the format of the output.
        - If 'dict' the output is a dict of dict
        - If 'dataframe' the output is a pandas DataFrame

    Returns
    -------
    dict or dataframe
    """
    if output_format not in ["dataframe", "dict", "object"]:
        raise ValueError(
            "Invalid output format selected. " "Only 'dict', 'object', or 'dataframe' applicable.",
        )

    # TODO: [0.15]
    if output_format == "dict":
        msg = (
            "Support for `output_format` of 'dict' will be removed in 0.15. "
            "To ensure your code will continue to work, "
            "use `output_format`='dataframe' or `output_format`='object'."
        )
        warnings.warn(msg, category=FutureWarning, stacklevel=2)

    batch_size = 1000  # batch size for setups is lower
    return openml.utils._list_all(  # type: ignore
        list_output_format=output_format,  # type: ignore
        listing_call=_list_setups,
        offset=offset,
        size=size,
        flow=flow,
        tag=tag,
        setup=setup,
        batch_size=batch_size,
    )

setup_exists(flow)

Checks whether a hyperparameter configuration already exists on the server.

Parameters:

Name Type Description Default
flow OpenMLFlow

The openml flow object. Should have flow id present for the main flow and all subflows (i.e., it should be downloaded from the server by means of flow.get, and not instantiated locally)

required

Returns:

Name Type Description
setup_id int

setup id iff exists, False otherwise

Source code in openml/setups/functions.py
def setup_exists(flow: OpenMLFlow) -> int:
    """
    Checks whether a hyperparameter configuration already exists on the server.

    Parameters
    ----------
    flow : OpenMLFlow
        The openml flow object. Should have flow id present for the main flow
        and all subflows (i.e., it should be downloaded from the server by
        means of flow.get, and not instantiated locally)

    Returns
    -------
    setup_id : int
        setup id iff exists, False otherwise
    """
    # sadly, this api call relies on a run object
    openml.flows.functions._check_flow_for_server_id(flow)
    if flow.model is None:
        raise ValueError("Flow should have model field set with the actual model.")
    if flow.extension is None:
        raise ValueError("Flow should have model field set with the correct extension.")

    # checks whether the flow exists on the server and flow ids align
    exists = flow_exists(flow.name, flow.external_version)
    if exists != flow.flow_id:
        raise ValueError(
            f"Local flow id ({flow.id}) differs from server id ({exists}). "
            "If this issue persists, please contact the developers.",
        )

    openml_param_settings = flow.extension.obtain_parameter_values(flow)
    description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
    file_elements = {
        "description": ("description.arff", description),
    }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
    result = openml._api_calls._perform_api_call(
        "/setup/exists/",
        "post",
        file_elements=file_elements,
    )
    result_dict = xmltodict.parse(result)
    setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
    return setup_id if setup_id > 0 else False