Skip to content

extensions

Extension

Bases: ABC

Defines the interface to connect machine learning libraries to OpenML-Python.

See openml.extension.sklearn.extension for an implementation to bootstrap from.

Source code in openml/extensions/extension_interface.py
class Extension(ABC):
    """Defines the interface to connect machine learning libraries to OpenML-Python.

    See ``openml.extension.sklearn.extension`` for an implementation to bootstrap from.
    """

    ################################################################################################
    # General setup

    @classmethod
    @abstractmethod
    def can_handle_flow(cls, flow: OpenMLFlow) -> bool:
        """Check whether a given flow can be handled by this extension.

        This is typically done by parsing the ``external_version`` field.

        Parameters
        ----------
        flow : OpenMLFlow

        Returns
        -------
        bool
        """

    @classmethod
    @abstractmethod
    def can_handle_model(cls, model: Any) -> bool:
        """Check whether a model flow can be handled by this extension.

        This is typically done by checking the type of the model, or the package it belongs to.

        Parameters
        ----------
        model : Any

        Returns
        -------
        bool
        """

    ################################################################################################
    # Abstract methods for flow serialization and de-serialization

    @abstractmethod
    def flow_to_model(
        self,
        flow: OpenMLFlow,
        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
        strict_version: bool = True,  # noqa: FBT002, FBT001
    ) -> Any:
        """Instantiate a model from the flow representation.

        Parameters
        ----------
        flow : OpenMLFlow

        initialize_with_defaults : bool, optional (default=False)
            If this flag is set, the hyperparameter values of flows will be
            ignored and a flow with its defaults is returned.

        strict_version : bool, default=True
            Whether to fail if version requirements are not fulfilled.

        Returns
        -------
        Any
        """

    @abstractmethod
    def model_to_flow(self, model: Any) -> OpenMLFlow:
        """Transform a model to a flow for uploading it to OpenML.

        Parameters
        ----------
        model : Any

        Returns
        -------
        OpenMLFlow
        """

    @abstractmethod
    def get_version_information(self) -> list[str]:
        """List versions of libraries required by the flow.

        Returns
        -------
        List
        """

    @abstractmethod
    def create_setup_string(self, model: Any) -> str:
        """Create a string which can be used to reinstantiate the given model.

        Parameters
        ----------
        model : Any

        Returns
        -------
        str
        """

    ################################################################################################
    # Abstract methods for performing runs with extension modules

    @abstractmethod
    def is_estimator(self, model: Any) -> bool:
        """Check whether the given model is an estimator for the given extension.

        This function is only required for backwards compatibility and will be removed in the
        near future.

        Parameters
        ----------
        model : Any

        Returns
        -------
        bool
        """

    @abstractmethod
    def seed_model(self, model: Any, seed: int | None) -> Any:
        """Set the seed of all the unseeded components of a model and return the seeded model.

        Required so that all seed information can be uploaded to OpenML for reproducible results.

        Parameters
        ----------
        model : Any
            The model to be seeded
        seed : int

        Returns
        -------
        model
        """

    @abstractmethod
    def _run_model_on_fold(  # noqa: PLR0913
        self,
        model: Any,
        task: OpenMLTask,
        X_train: np.ndarray | scipy.sparse.spmatrix,
        rep_no: int,
        fold_no: int,
        y_train: np.ndarray | None = None,
        X_test: np.ndarray | scipy.sparse.spmatrix | None = None,
    ) -> tuple[np.ndarray, np.ndarray | None, OrderedDict[str, float], OpenMLRunTrace | None]:
        """Run a model on a repeat, fold, subsample triplet of the task.

        Returns the data that is necessary to construct the OpenML Run object. Is used by
        :func:`openml.runs.run_flow_on_task`.

        Parameters
        ----------
        model : Any
            The UNTRAINED model to run. The model instance will be copied and not altered.
        task : OpenMLTask
            The task to run the model on.
        X_train : array-like
            Training data for the given repetition and fold.
        rep_no : int
            The repeat of the experiment (0-based; in case of 1 time CV, always 0)
        fold_no : int
            The fold nr of the experiment (0-based; in case of holdout, always 0)
        y_train : Optional[np.ndarray] (default=None)
            Target attributes for supervised tasks. In case of classification, these are integer
            indices to the potential classes specified by dataset.
        X_test : Optional, array-like (default=None)
            Test attributes to test for generalization in supervised tasks.

        Returns
        -------
        predictions : np.ndarray
            Model predictions.
        probabilities :  Optional, np.ndarray
            Predicted probabilities (only applicable for supervised classification tasks).
        user_defined_measures : OrderedDict[str, float]
            User defined measures that were generated on this fold
        trace : Optional, OpenMLRunTrace
            Hyperparameter optimization trace (only applicable for supervised tasks with
            hyperparameter optimization).
        """

    @abstractmethod
    def obtain_parameter_values(
        self,
        flow: OpenMLFlow,
        model: Any = None,
    ) -> list[dict[str, Any]]:
        """Extracts all parameter settings required for the flow from the model.

        If no explicit model is provided, the parameters will be extracted from `flow.model`
        instead.

        Parameters
        ----------
        flow : OpenMLFlow
            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)

        model: Any, optional (default=None)
            The model from which to obtain the parameter values. Must match the flow signature.
            If None, use the model specified in ``OpenMLFlow.model``.

        Returns
        -------
        list
            A list of dicts, where each dict has the following entries:
            - ``oml:name`` : str: The OpenML parameter name
            - ``oml:value`` : mixed: A representation of the parameter value
            - ``oml:component`` : int: flow id to which the parameter belongs
        """

    @abstractmethod
    def check_if_model_fitted(self, model: Any) -> bool:
        """Returns True/False denoting if the model has already been fitted/trained.

        Parameters
        ----------
        model : Any

        Returns
        -------
        bool
        """

    ################################################################################################
    # Abstract methods for hyperparameter optimization

    @abstractmethod
    def instantiate_model_from_hpo_class(
        self,
        model: Any,
        trace_iteration: OpenMLTraceIteration,
    ) -> Any:
        """Instantiate a base model which can be searched over by the hyperparameter optimization
        model.

        Parameters
        ----------
        model : Any
            A hyperparameter optimization model which defines the model to be instantiated.
        trace_iteration : OpenMLTraceIteration
            Describing the hyperparameter settings to instantiate.

        Returns
        -------
        Any
        """

can_handle_flow(flow) abstractmethod classmethod

Check whether a given flow can be handled by this extension.

This is typically done by parsing the external_version field.

Parameters:

Name Type Description Default
flow OpenMLFlow
required

Returns:

Type Description
bool
Source code in openml/extensions/extension_interface.py
@classmethod
@abstractmethod
def can_handle_flow(cls, flow: OpenMLFlow) -> bool:
    """Check whether a given flow can be handled by this extension.

    This is typically done by parsing the ``external_version`` field.

    Parameters
    ----------
    flow : OpenMLFlow

    Returns
    -------
    bool
    """

can_handle_model(model) abstractmethod classmethod

Check whether a model flow can be handled by this extension.

This is typically done by checking the type of the model, or the package it belongs to.

Parameters:

Name Type Description Default
model Any
required

Returns:

Type Description
bool
Source code in openml/extensions/extension_interface.py
@classmethod
@abstractmethod
def can_handle_model(cls, model: Any) -> bool:
    """Check whether a model flow can be handled by this extension.

    This is typically done by checking the type of the model, or the package it belongs to.

    Parameters
    ----------
    model : Any

    Returns
    -------
    bool
    """

check_if_model_fitted(model) abstractmethod

Returns True/False denoting if the model has already been fitted/trained.

Parameters:

Name Type Description Default
model Any
required

Returns:

Type Description
bool
Source code in openml/extensions/extension_interface.py
@abstractmethod
def check_if_model_fitted(self, model: Any) -> bool:
    """Returns True/False denoting if the model has already been fitted/trained.

    Parameters
    ----------
    model : Any

    Returns
    -------
    bool
    """

create_setup_string(model) abstractmethod

Create a string which can be used to reinstantiate the given model.

Parameters:

Name Type Description Default
model Any
required

Returns:

Type Description
str
Source code in openml/extensions/extension_interface.py
@abstractmethod
def create_setup_string(self, model: Any) -> str:
    """Create a string which can be used to reinstantiate the given model.

    Parameters
    ----------
    model : Any

    Returns
    -------
    str
    """

flow_to_model(flow, initialize_with_defaults=False, strict_version=True) abstractmethod

Instantiate a model from the flow representation.

Parameters:

Name Type Description Default
flow OpenMLFlow
required
initialize_with_defaults (bool, optional(default=False))

If this flag is set, the hyperparameter values of flows will be ignored and a flow with its defaults is returned.

False
strict_version bool

Whether to fail if version requirements are not fulfilled.

True

Returns:

Type Description
Any
Source code in openml/extensions/extension_interface.py
@abstractmethod
def flow_to_model(
    self,
    flow: OpenMLFlow,
    initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
    strict_version: bool = True,  # noqa: FBT002, FBT001
) -> Any:
    """Instantiate a model from the flow representation.

    Parameters
    ----------
    flow : OpenMLFlow

    initialize_with_defaults : bool, optional (default=False)
        If this flag is set, the hyperparameter values of flows will be
        ignored and a flow with its defaults is returned.

    strict_version : bool, default=True
        Whether to fail if version requirements are not fulfilled.

    Returns
    -------
    Any
    """

get_version_information() abstractmethod

List versions of libraries required by the flow.

Returns:

Type Description
List
Source code in openml/extensions/extension_interface.py
@abstractmethod
def get_version_information(self) -> list[str]:
    """List versions of libraries required by the flow.

    Returns
    -------
    List
    """

instantiate_model_from_hpo_class(model, trace_iteration) abstractmethod

Instantiate a base model which can be searched over by the hyperparameter optimization model.

Parameters:

Name Type Description Default
model Any

A hyperparameter optimization model which defines the model to be instantiated.

required
trace_iteration OpenMLTraceIteration

Describing the hyperparameter settings to instantiate.

required

Returns:

Type Description
Any
Source code in openml/extensions/extension_interface.py
@abstractmethod
def instantiate_model_from_hpo_class(
    self,
    model: Any,
    trace_iteration: OpenMLTraceIteration,
) -> Any:
    """Instantiate a base model which can be searched over by the hyperparameter optimization
    model.

    Parameters
    ----------
    model : Any
        A hyperparameter optimization model which defines the model to be instantiated.
    trace_iteration : OpenMLTraceIteration
        Describing the hyperparameter settings to instantiate.

    Returns
    -------
    Any
    """

is_estimator(model) abstractmethod

Check whether the given model is an estimator for the given extension.

This function is only required for backwards compatibility and will be removed in the near future.

Parameters:

Name Type Description Default
model Any
required

Returns:

Type Description
bool
Source code in openml/extensions/extension_interface.py
@abstractmethod
def is_estimator(self, model: Any) -> bool:
    """Check whether the given model is an estimator for the given extension.

    This function is only required for backwards compatibility and will be removed in the
    near future.

    Parameters
    ----------
    model : Any

    Returns
    -------
    bool
    """

model_to_flow(model) abstractmethod

Transform a model to a flow for uploading it to OpenML.

Parameters:

Name Type Description Default
model Any
required

Returns:

Type Description
OpenMLFlow
Source code in openml/extensions/extension_interface.py
@abstractmethod
def model_to_flow(self, model: Any) -> OpenMLFlow:
    """Transform a model to a flow for uploading it to OpenML.

    Parameters
    ----------
    model : Any

    Returns
    -------
    OpenMLFlow
    """

obtain_parameter_values(flow, model=None) abstractmethod

Extracts all parameter settings required for the flow from the model.

If no explicit model is provided, the parameters will be extracted from flow.model instead.

Parameters:

Name Type Description Default
flow OpenMLFlow

OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)

required
model Any

The model from which to obtain the parameter values. Must match the flow signature. If None, use the model specified in OpenMLFlow.model.

None

Returns:

Type Description
list

A list of dicts, where each dict has the following entries: - oml:name : str: The OpenML parameter name - oml:value : mixed: A representation of the parameter value - oml:component : int: flow id to which the parameter belongs

Source code in openml/extensions/extension_interface.py
@abstractmethod
def obtain_parameter_values(
    self,
    flow: OpenMLFlow,
    model: Any = None,
) -> list[dict[str, Any]]:
    """Extracts all parameter settings required for the flow from the model.

    If no explicit model is provided, the parameters will be extracted from `flow.model`
    instead.

    Parameters
    ----------
    flow : OpenMLFlow
        OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)

    model: Any, optional (default=None)
        The model from which to obtain the parameter values. Must match the flow signature.
        If None, use the model specified in ``OpenMLFlow.model``.

    Returns
    -------
    list
        A list of dicts, where each dict has the following entries:
        - ``oml:name`` : str: The OpenML parameter name
        - ``oml:value`` : mixed: A representation of the parameter value
        - ``oml:component`` : int: flow id to which the parameter belongs
    """

seed_model(model, seed) abstractmethod

Set the seed of all the unseeded components of a model and return the seeded model.

Required so that all seed information can be uploaded to OpenML for reproducible results.

Parameters:

Name Type Description Default
model Any

The model to be seeded

required
seed int
required

Returns:

Type Description
model
Source code in openml/extensions/extension_interface.py
@abstractmethod
def seed_model(self, model: Any, seed: int | None) -> Any:
    """Set the seed of all the unseeded components of a model and return the seeded model.

    Required so that all seed information can be uploaded to OpenML for reproducible results.

    Parameters
    ----------
    model : Any
        The model to be seeded
    seed : int

    Returns
    -------
    model
    """

get_extension_by_flow(flow, raise_if_no_extension=False)

Get an extension which can handle the given flow.

Iterates all registered extensions and checks whether they can handle the presented flow. Raises an exception if two extensions can handle a flow.

Parameters:

Name Type Description Default
flow OpenMLFlow
required
raise_if_no_extension bool (optional

Raise an exception if no registered extension can handle the presented flow.

False)

Returns:

Type Description
Extension or None
Source code in openml/extensions/functions.py
def get_extension_by_flow(
    flow: OpenMLFlow,
    raise_if_no_extension: bool = False,  # noqa: FBT001, FBT002
) -> Extension | None:
    """Get an extension which can handle the given flow.

    Iterates all registered extensions and checks whether they can handle the presented flow.
    Raises an exception if two extensions can handle a flow.

    Parameters
    ----------
    flow : OpenMLFlow

    raise_if_no_extension : bool (optional, default=False)
        Raise an exception if no registered extension can handle the presented flow.

    Returns
    -------
    Extension or None
    """
    candidates = []
    for extension_class in openml.extensions.extensions:
        if extension_class.can_handle_flow(flow):
            candidates.append(extension_class())
    if len(candidates) == 0:
        if raise_if_no_extension:
            raise ValueError(f"No extension registered which can handle flow: {flow}")

        return None

    if len(candidates) == 1:
        return candidates[0]

    raise ValueError(
        f"Multiple extensions registered which can handle flow: {flow}, but only one "
        f"is allowed ({candidates}).",
    )

get_extension_by_model(model, raise_if_no_extension=False)

Get an extension which can handle the given flow.

Iterates all registered extensions and checks whether they can handle the presented model. Raises an exception if two extensions can handle a model.

Parameters:

Name Type Description Default
model Any
required
raise_if_no_extension bool (optional

Raise an exception if no registered extension can handle the presented model.

False)

Returns:

Type Description
Extension or None
Source code in openml/extensions/functions.py
def get_extension_by_model(
    model: Any,
    raise_if_no_extension: bool = False,  # noqa: FBT001, FBT002
) -> Extension | None:
    """Get an extension which can handle the given flow.

    Iterates all registered extensions and checks whether they can handle the presented model.
    Raises an exception if two extensions can handle a model.

    Parameters
    ----------
    model : Any

    raise_if_no_extension : bool (optional, default=False)
        Raise an exception if no registered extension can handle the presented model.

    Returns
    -------
    Extension or None
    """
    candidates = []
    for extension_class in openml.extensions.extensions:
        if extension_class.can_handle_model(model):
            candidates.append(extension_class())
    if len(candidates) == 0:
        if raise_if_no_extension:
            raise ValueError(f"No extension registered which can handle model: {model}")

        return None

    if len(candidates) == 1:
        return candidates[0]

    raise ValueError(
        f"Multiple extensions registered which can handle model: {model}, but only one "
        f"is allowed ({candidates}).",
    )

register_extension(extension)

Register an extension.

Registered extensions are considered by get_extension_by_flow and get_extension_by_model, which are used by openml.flow and openml.runs.

Parameters:

Name Type Description Default
extension Type[Extension]
required

Returns:

Type Description
None
Source code in openml/extensions/functions.py
def register_extension(extension: type[Extension]) -> None:
    """Register an extension.

    Registered extensions are considered by ``get_extension_by_flow`` and
    ``get_extension_by_model``, which are used by ``openml.flow`` and ``openml.runs``.

    Parameters
    ----------
    extension : Type[Extension]

    Returns
    -------
    None
    """
    openml.extensions.extensions.append(extension)