tasks

`OpenMLClassificationTask` ¶

Bases: OpenMLSupervisedTask

OpenML Classification object.

Parameters:

Name	Type	Description	Default
`task_type_id`	`TaskType`	ID of the Classification task type.	required
`task_type`	`str`	Name of the Classification task type.	required
`data_set_id`	`int`	ID of the OpenML dataset associated with the Classification task.	required
`target_name`	`str`	Name of the target variable.	required
`estimation_procedure_id`	`int`	ID of the estimation procedure for the Classification task.	`None`
`estimation_procedure_type`	`str`	Type of the estimation procedure.	`None`
`estimation_parameters`	`dict`	Estimation parameters for the Classification task.	`None`
`evaluation_measure`	`str`	Name of the evaluation measure.	`None`
`data_splits_url`	`str`	URL of the data splits for the Classification task.	`None`
`task_id`	`Union[int, None]`	ID of the Classification task (if it already exists on OpenML).	`None`
`class_labels`	`List of str`	A list of class labels (for classification tasks).	`None`
`cost_matrix`	`array`	A cost matrix (for classification tasks).	`None`

Source code in openml/tasks/task.py

class OpenMLClassificationTask(OpenMLSupervisedTask):
    """OpenML Classification object.

    Parameters
    ----------
    task_type_id : TaskType
        ID of the Classification task type.
    task_type : str
        Name of the Classification task type.
    data_set_id : int
        ID of the OpenML dataset associated with the Classification task.
    target_name : str
        Name of the target variable.
    estimation_procedure_id : int, default=None
        ID of the estimation procedure for the Classification task.
    estimation_procedure_type : str, default=None
        Type of the estimation procedure.
    estimation_parameters : dict, default=None
        Estimation parameters for the Classification task.
    evaluation_measure : str, default=None
        Name of the evaluation measure.
    data_splits_url : str, default=None
        URL of the data splits for the Classification task.
    task_id : Union[int, None]
        ID of the Classification task (if it already exists on OpenML).
    class_labels : List of str, default=None
        A list of class labels (for classification tasks).
    cost_matrix : array, default=None
        A cost matrix (for classification tasks).
    """

    def __init__(  # noqa: PLR0913
        self,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        target_name: str,
        estimation_procedure_id: int = 1,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        evaluation_measure: str | None = None,
        data_splits_url: str | None = None,
        task_id: int | None = None,
        class_labels: list[str] | None = None,
        cost_matrix: np.ndarray | None = None,
    ):
        super().__init__(
            task_id=task_id,
            task_type_id=task_type_id,
            task_type=task_type,
            data_set_id=data_set_id,
            estimation_procedure_id=estimation_procedure_id,
            estimation_procedure_type=estimation_procedure_type,
            estimation_parameters=estimation_parameters,
            evaluation_measure=evaluation_measure,
            target_name=target_name,
            data_splits_url=data_splits_url,
        )
        self.class_labels = class_labels
        self.cost_matrix = cost_matrix

        if cost_matrix is not None:
            raise NotImplementedError("Costmatrix")

`OpenMLClusteringTask` ¶

Bases: OpenMLTask

OpenML Clustering object.

Parameters:

Name	Type	Description	Default
`task_type_id`	`TaskType`	Task type ID of the OpenML clustering task.	required
`task_type`	`str`	Task type of the OpenML clustering task.	required
`data_set_id`	`int`	ID of the OpenML dataset used in clustering the task.	required
`estimation_procedure_id`	`int`	ID of the OpenML estimation procedure.	`None`
`task_id`	`Union[int, None]`	ID of the OpenML clustering task.	`None`
`estimation_procedure_type`	`str`	Type of the OpenML estimation procedure used in the clustering task.	`None`
`estimation_parameters`	`dict`	Parameters used by the OpenML estimation procedure.	`None`
`data_splits_url`	`str`	URL of the OpenML data splits for the clustering task.	`None`
`evaluation_measure`	`str`	Evaluation measure used in the clustering task.	`None`
`target_name`	`str`	Name of the target feature (class) that is not part of the feature set for the clustering task.	`None`

Source code in openml/tasks/task.py

class OpenMLClusteringTask(OpenMLTask):
    """OpenML Clustering object.

    Parameters
    ----------
    task_type_id : TaskType
        Task type ID of the OpenML clustering task.
    task_type : str
        Task type of the OpenML clustering task.
    data_set_id : int
        ID of the OpenML dataset used in clustering the task.
    estimation_procedure_id : int, default=None
        ID of the OpenML estimation procedure.
    task_id : Union[int, None]
        ID of the OpenML clustering task.
    estimation_procedure_type : str, default=None
        Type of the OpenML estimation procedure used in the clustering task.
    estimation_parameters : dict, default=None
        Parameters used by the OpenML estimation procedure.
    data_splits_url : str, default=None
        URL of the OpenML data splits for the clustering task.
    evaluation_measure : str, default=None
        Evaluation measure used in the clustering task.
    target_name : str, default=None
        Name of the target feature (class) that is not part of the
        feature set for the clustering task.
    """

    def __init__(  # noqa: PLR0913
        self,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        estimation_procedure_id: int = 17,
        task_id: int | None = None,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        data_splits_url: str | None = None,
        evaluation_measure: str | None = None,
        target_name: str | None = None,
    ):
        super().__init__(
            task_id=task_id,
            task_type_id=task_type_id,
            task_type=task_type,
            data_set_id=data_set_id,
            evaluation_measure=evaluation_measure,
            estimation_procedure_id=estimation_procedure_id,
            estimation_procedure_type=estimation_procedure_type,
            estimation_parameters=estimation_parameters,
            data_splits_url=data_splits_url,
        )

        self.target_name = target_name

    @overload
    def get_X(
        self,
        dataset_format: Literal["array"] = "array",
    ) -> np.ndarray | scipy.sparse.spmatrix:
        ...

    @overload
    def get_X(self, dataset_format: Literal["dataframe"]) -> pd.DataFrame:
        ...

    def get_X(
        self,
        dataset_format: Literal["array", "dataframe"] = "array",
    ) -> np.ndarray | pd.DataFrame | scipy.sparse.spmatrix:
        """Get data associated with the current task.

        Parameters
        ----------
        dataset_format : str
            Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data`
            for possible options.

        Returns
        -------
        tuple - X and y

        """
        dataset = self.get_dataset()
        data, *_ = dataset.get_data(dataset_format=dataset_format, target=None)
        return data

    def _to_dict(self) -> dict[str, dict[str, int | str | list[dict[str, Any]]]]:
        # Right now, it is not supported as a feature.
        # Uncomment if it is supported on the server
        # in the future.
        # https://github.com/openml/OpenML/issues/925
        """
        task_dict = task_container['oml:task_inputs']
        if self.target_name is not None:
            task_dict['oml:input'].append(
                OrderedDict([
                    ('@name', 'target_feature'),
                    ('#text', self.target_name)
                ])
            )
        """
        return super()._to_dict()

`get_X(dataset_format='array')` ¶

get_X(dataset_format: Literal['array'] = 'array') -> np.ndarray | scipy.sparse.spmatrix

get_X(dataset_format: Literal['dataframe']) -> pd.DataFrame

Get data associated with the current task.

Parameters:

Name	Type	Description	Default
`dataset_format`	`str`	Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data` for possible options.	`'array'`

Returns:

Type	Description
`tuple - X and y`

Source code in openml/tasks/task.py

def get_X(
    self,
    dataset_format: Literal["array", "dataframe"] = "array",
) -> np.ndarray | pd.DataFrame | scipy.sparse.spmatrix:
    """Get data associated with the current task.

    Parameters
    ----------
    dataset_format : str
        Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data`
        for possible options.

    Returns
    -------
    tuple - X and y

    """
    dataset = self.get_dataset()
    data, *_ = dataset.get_data(dataset_format=dataset_format, target=None)
    return data

`OpenMLLearningCurveTask` ¶

Bases: OpenMLClassificationTask

OpenML Learning Curve object.

Parameters:

Name	Type	Description	Default
`task_type_id`	`TaskType`	ID of the Learning Curve task.	required
`task_type`	`str`	Name of the Learning Curve task.	required
`data_set_id`	`int`	ID of the dataset that this task is associated with.	required
`target_name`	`str`	Name of the target feature in the dataset.	required
`estimation_procedure_id`	`int`	ID of the estimation procedure to use for evaluating models.	`None`
`estimation_procedure_type`	`str`	Type of the estimation procedure.	`None`
`estimation_parameters`	`dict`	Additional parameters for the estimation procedure.	`None`
`data_splits_url`	`str`	URL of the file containing the data splits for Learning Curve task.	`None`
`task_id`	`Union[int, None]`	ID of the Learning Curve task.	`None`
`evaluation_measure`	`str`	Name of the evaluation measure to use for evaluating models.	`None`
`class_labels`	`list of str`	Class labels for Learning Curve tasks.	`None`
`cost_matrix`	`numpy array`	Cost matrix for Learning Curve tasks.	`None`

Source code in openml/tasks/task.py

class OpenMLLearningCurveTask(OpenMLClassificationTask):
    """OpenML Learning Curve object.

    Parameters
    ----------
    task_type_id : TaskType
        ID of the Learning Curve task.
    task_type : str
        Name of the Learning Curve task.
    data_set_id : int
        ID of the dataset that this task is associated with.
    target_name : str
        Name of the target feature in the dataset.
    estimation_procedure_id : int, default=None
        ID of the estimation procedure to use for evaluating models.
    estimation_procedure_type : str, default=None
        Type of the estimation procedure.
    estimation_parameters : dict, default=None
        Additional parameters for the estimation procedure.
    data_splits_url : str, default=None
        URL of the file containing the data splits for Learning Curve task.
    task_id : Union[int, None]
        ID of the Learning Curve task.
    evaluation_measure : str, default=None
        Name of the evaluation measure to use for evaluating models.
    class_labels : list of str, default=None
        Class labels for Learning Curve tasks.
    cost_matrix : numpy array, default=None
        Cost matrix for Learning Curve tasks.
    """

    def __init__(  # noqa: PLR0913
        self,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        target_name: str,
        estimation_procedure_id: int = 13,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        data_splits_url: str | None = None,
        task_id: int | None = None,
        evaluation_measure: str | None = None,
        class_labels: list[str] | None = None,
        cost_matrix: np.ndarray | None = None,
    ):
        super().__init__(
            task_id=task_id,
            task_type_id=task_type_id,
            task_type=task_type,
            data_set_id=data_set_id,
            estimation_procedure_id=estimation_procedure_id,
            estimation_procedure_type=estimation_procedure_type,
            estimation_parameters=estimation_parameters,
            evaluation_measure=evaluation_measure,
            target_name=target_name,
            data_splits_url=data_splits_url,
            class_labels=class_labels,
            cost_matrix=cost_matrix,
        )

`OpenMLRegressionTask` ¶

Bases: OpenMLSupervisedTask

OpenML Regression object.

Parameters:

Name	Type	Description	Default
`task_type_id`	`TaskType`	Task type ID of the OpenML Regression task.	required
`task_type`	`str`	Task type of the OpenML Regression task.	required
`data_set_id`	`int`	ID of the OpenML dataset.	required
`target_name`	`str`	Name of the target feature used in the Regression task.	required
`estimation_procedure_id`	`int`	ID of the OpenML estimation procedure.	`None`
`estimation_procedure_type`	`str`	Type of the OpenML estimation procedure.	`None`
`estimation_parameters`	`dict`	Parameters used by the OpenML estimation procedure.	`None`
`data_splits_url`	`str`	URL of the OpenML data splits for the Regression task.	`None`
`task_id`	`Union[int, None]`	ID of the OpenML Regression task.	`None`
`evaluation_measure`	`str`	Evaluation measure used in the Regression task.	`None`

Source code in openml/tasks/task.py

class OpenMLRegressionTask(OpenMLSupervisedTask):
    """OpenML Regression object.

    Parameters
    ----------
    task_type_id : TaskType
        Task type ID of the OpenML Regression task.
    task_type : str
        Task type of the OpenML Regression task.
    data_set_id : int
        ID of the OpenML dataset.
    target_name : str
        Name of the target feature used in the Regression task.
    estimation_procedure_id : int, default=None
        ID of the OpenML estimation procedure.
    estimation_procedure_type : str, default=None
        Type of the OpenML estimation procedure.
    estimation_parameters : dict, default=None
        Parameters used by the OpenML estimation procedure.
    data_splits_url : str, default=None
        URL of the OpenML data splits for the Regression task.
    task_id : Union[int, None]
        ID of the OpenML Regression task.
    evaluation_measure : str, default=None
        Evaluation measure used in the Regression task.
    """

    def __init__(  # noqa: PLR0913
        self,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        target_name: str,
        estimation_procedure_id: int = 7,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        data_splits_url: str | None = None,
        task_id: int | None = None,
        evaluation_measure: str | None = None,
    ):
        super().__init__(
            task_id=task_id,
            task_type_id=task_type_id,
            task_type=task_type,
            data_set_id=data_set_id,
            estimation_procedure_id=estimation_procedure_id,
            estimation_procedure_type=estimation_procedure_type,
            estimation_parameters=estimation_parameters,
            evaluation_measure=evaluation_measure,
            target_name=target_name,
            data_splits_url=data_splits_url,
        )

`OpenMLSplit` ¶

OpenML Split object.

Parameters:

Name	Type	Default
`name`	`int or str`	required
`description`	`str`	required
`split`	`dict`	required

Source code in openml/tasks/split.py

class OpenMLSplit:
    """OpenML Split object.

    Parameters
    ----------
    name : int or str
    description : str
    split : dict
    """

    def __init__(
        self,
        name: int | str,
        description: str,
        split: dict[int, dict[int, dict[int, tuple[np.ndarray, np.ndarray]]]],
    ):
        self.description = description
        self.name = name
        self.split: dict[int, dict[int, dict[int, tuple[np.ndarray, np.ndarray]]]] = {}

        # Add splits according to repetition
        for repetition in split:
            _rep = int(repetition)
            self.split[_rep] = OrderedDict()
            for fold in split[_rep]:
                self.split[_rep][fold] = OrderedDict()
                for sample in split[_rep][fold]:
                    self.split[_rep][fold][sample] = split[_rep][fold][sample]

        self.repeats = len(self.split)

        # TODO(eddiebergman): Better error message
        if any(len(self.split[0]) != len(self.split[i]) for i in range(self.repeats)):
            raise ValueError("")

        self.folds = len(self.split[0])
        self.samples = len(self.split[0][0])

    def __eq__(self, other: Any) -> bool:
        if (
            (not isinstance(self, type(other)))
            or self.name != other.name
            or self.description != other.description
            or self.split.keys() != other.split.keys()
            or any(
                self.split[repetition].keys() != other.split[repetition].keys()
                for repetition in self.split
            )
        ):
            return False

        samples = [
            (repetition, fold, sample)
            for repetition in self.split
            for fold in self.split[repetition]
            for sample in self.split[repetition][fold]
        ]

        for repetition, fold, sample in samples:
            self_train, self_test = self.split[repetition][fold][sample]
            other_train, other_test = other.split[repetition][fold][sample]
            if not (np.all(self_train == other_train) and np.all(self_test == other_test)):
                return False
        return True

    @classmethod
    def _from_arff_file(cls, filename: Path) -> OpenMLSplit:  # noqa: C901, PLR0912
        repetitions = None
        name = None

        pkl_filename = filename.with_suffix(".pkl.py3")

        if pkl_filename.exists():
            with pkl_filename.open("rb") as fh:
                # TODO(eddiebergman): Would be good to figure out what _split is and assert it is
                _split = pickle.load(fh)  # noqa: S301
            repetitions = _split["repetitions"]
            name = _split["name"]

        # Cache miss
        if repetitions is None:
            # Faster than liac-arff and sufficient in this situation!
            if not filename.exists():
                raise FileNotFoundError(f"Split arff {filename} does not exist!")

            file_data = arff.load(filename.open("r"), return_type=arff.DENSE_GEN)
            splits = file_data["data"]
            name = file_data["relation"]
            attrnames = [attr[0] for attr in file_data["attributes"]]

            repetitions = OrderedDict()

            type_idx = attrnames.index("type")
            rowid_idx = attrnames.index("rowid")
            repeat_idx = attrnames.index("repeat")
            fold_idx = attrnames.index("fold")
            sample_idx = attrnames.index("sample") if "sample" in attrnames else None

            for line in splits:
                # A line looks like type, rowid, repeat, fold
                repetition = int(line[repeat_idx])
                fold = int(line[fold_idx])
                sample = 0
                if sample_idx is not None:
                    sample = int(line[sample_idx])

                if repetition not in repetitions:
                    repetitions[repetition] = OrderedDict()
                if fold not in repetitions[repetition]:
                    repetitions[repetition][fold] = OrderedDict()
                if sample not in repetitions[repetition][fold]:
                    repetitions[repetition][fold][sample] = ([], [])
                split = repetitions[repetition][fold][sample]

                type_ = line[type_idx]
                if type_ == "TRAIN":
                    split[0].append(line[rowid_idx])
                elif type_ == "TEST":
                    split[1].append(line[rowid_idx])
                else:
                    raise ValueError(type_)

            for repetition in repetitions:
                for fold in repetitions[repetition]:
                    for sample in repetitions[repetition][fold]:
                        repetitions[repetition][fold][sample] = Split(
                            np.array(repetitions[repetition][fold][sample][0], dtype=np.int32),
                            np.array(repetitions[repetition][fold][sample][1], dtype=np.int32),
                        )

            with pkl_filename.open("wb") as fh:
                pickle.dump({"name": name, "repetitions": repetitions}, fh, protocol=2)

        assert name is not None
        return cls(name, "", repetitions)

    def get(self, repeat: int = 0, fold: int = 0, sample: int = 0) -> tuple[np.ndarray, np.ndarray]:
        """Returns the specified data split from the CrossValidationSplit object.

        Parameters
        ----------
        repeat : int
            Index of the repeat to retrieve.
        fold : int
            Index of the fold to retrieve.
        sample : int
            Index of the sample to retrieve.

        Returns
        -------
        numpy.ndarray
            The data split for the specified repeat, fold, and sample.

        Raises
        ------
        ValueError
            If the specified repeat, fold, or sample is not known.
        """
        if repeat not in self.split:
            raise ValueError("Repeat %s not known" % str(repeat))
        if fold not in self.split[repeat]:
            raise ValueError("Fold %s not known" % str(fold))
        if sample not in self.split[repeat][fold]:
            raise ValueError("Sample %s not known" % str(sample))
        return self.split[repeat][fold][sample]

`get(repeat=0, fold=0, sample=0)` ¶

Returns the specified data split from the CrossValidationSplit object.

Parameters:

Name	Type	Description	Default
`repeat`	`int`	Index of the repeat to retrieve.	`0`
`fold`	`int`	Index of the fold to retrieve.	`0`
`sample`	`int`	Index of the sample to retrieve.	`0`

Returns:

Type	Description
`ndarray`	The data split for the specified repeat, fold, and sample.

Raises:

Type	Description
`ValueError`	If the specified repeat, fold, or sample is not known.

Source code in openml/tasks/split.py

def get(self, repeat: int = 0, fold: int = 0, sample: int = 0) -> tuple[np.ndarray, np.ndarray]:
    """Returns the specified data split from the CrossValidationSplit object.

    Parameters
    ----------
    repeat : int
        Index of the repeat to retrieve.
    fold : int
        Index of the fold to retrieve.
    sample : int
        Index of the sample to retrieve.

    Returns
    -------
    numpy.ndarray
        The data split for the specified repeat, fold, and sample.

    Raises
    ------
    ValueError
        If the specified repeat, fold, or sample is not known.
    """
    if repeat not in self.split:
        raise ValueError("Repeat %s not known" % str(repeat))
    if fold not in self.split[repeat]:
        raise ValueError("Fold %s not known" % str(fold))
    if sample not in self.split[repeat][fold]:
        raise ValueError("Sample %s not known" % str(sample))
    return self.split[repeat][fold][sample]

`OpenMLSupervisedTask` ¶

Bases: OpenMLTask, ABC

OpenML Supervised Classification object.

Parameters:

Name	Type	Description	Default
`task_type_id`	`TaskType`	ID of the task type.	required
`task_type`	`str`	Name of the task type.	required
`data_set_id`	`int`	ID of the OpenML dataset associated with the task.	required
`target_name`	`str`	Name of the target feature (the class variable).	required
`estimation_procedure_id`	`int`	ID of the estimation procedure for the task.	`None`
`estimation_procedure_type`	`str`	Type of the estimation procedure for the task.	`None`
`estimation_parameters`	`dict`	Estimation parameters for the task.	`None`
`evaluation_measure`	`str`	Name of the evaluation measure for the task.	`None`
`data_splits_url`	`str`	URL of the data splits for the task.	`None`
`task_id`	`int \| None`	Refers to the unique identifier of task.	`None`

Source code in openml/tasks/task.py

class OpenMLSupervisedTask(OpenMLTask, ABC):
    """OpenML Supervised Classification object.

    Parameters
    ----------
    task_type_id : TaskType
        ID of the task type.
    task_type : str
        Name of the task type.
    data_set_id : int
        ID of the OpenML dataset associated with the task.
    target_name : str
        Name of the target feature (the class variable).
    estimation_procedure_id : int, default=None
        ID of the estimation procedure for the task.
    estimation_procedure_type : str, default=None
        Type of the estimation procedure for the task.
    estimation_parameters : dict, default=None
        Estimation parameters for the task.
    evaluation_measure : str, default=None
        Name of the evaluation measure for the task.
    data_splits_url : str, default=None
        URL of the data splits for the task.
    task_id: Union[int, None]
        Refers to the unique identifier of task.
    """

    def __init__(  # noqa: PLR0913
        self,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        target_name: str,
        estimation_procedure_id: int = 1,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        evaluation_measure: str | None = None,
        data_splits_url: str | None = None,
        task_id: int | None = None,
    ):
        super().__init__(
            task_id=task_id,
            task_type_id=task_type_id,
            task_type=task_type,
            data_set_id=data_set_id,
            estimation_procedure_id=estimation_procedure_id,
            estimation_procedure_type=estimation_procedure_type,
            estimation_parameters=estimation_parameters,
            evaluation_measure=evaluation_measure,
            data_splits_url=data_splits_url,
        )

        self.target_name = target_name

    @overload
    def get_X_and_y(
        self, dataset_format: Literal["array"] = "array"
    ) -> tuple[
        np.ndarray | scipy.sparse.spmatrix,
        np.ndarray | None,
    ]:
        ...

    @overload
    def get_X_and_y(
        self, dataset_format: Literal["dataframe"]
    ) -> tuple[
        pd.DataFrame,
        pd.Series | pd.DataFrame | None,
    ]:
        ...

    # TODO(eddiebergman): Do all OpenMLSupervisedTask have a `y`?
    def get_X_and_y(
        self, dataset_format: Literal["dataframe", "array"] = "array"
    ) -> tuple[
        np.ndarray | pd.DataFrame | scipy.sparse.spmatrix,
        np.ndarray | pd.Series | pd.DataFrame | None,
    ]:
        """Get data associated with the current task.

        Parameters
        ----------
        dataset_format : str
            Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data`
            for possible options.

        Returns
        -------
        tuple - X and y

        """
        # TODO: [0.15]
        if dataset_format == "array":
            warnings.warn(
                "Support for `dataset_format='array'` will be removed in 0.15,"
                "start using `dataset_format='dataframe' to ensure your code "
                "will continue to work. You can use the dataframe's `to_numpy` "
                "function to continue using numpy arrays.",
                category=FutureWarning,
                stacklevel=2,
            )
        dataset = self.get_dataset()
        if self.task_type_id not in (
            TaskType.SUPERVISED_CLASSIFICATION,
            TaskType.SUPERVISED_REGRESSION,
            TaskType.LEARNING_CURVE,
        ):
            raise NotImplementedError(self.task_type)

        X, y, _, _ = dataset.get_data(
            dataset_format=dataset_format,
            target=self.target_name,
        )
        return X, y

    def _to_dict(self) -> dict[str, dict]:
        task_container = super()._to_dict()
        oml_input = task_container["oml:task_inputs"]["oml:input"]  # type: ignore
        assert isinstance(oml_input, list)

        oml_input.append({"@name": "target_feature", "#text": self.target_name})
        return task_container

    @property
    def estimation_parameters(self) -> dict[str, str] | None:
        """Return the estimation parameters for the task."""
        warnings.warn(
            "The estimation_parameters attribute will be "
            "deprecated in the future, please use "
            "estimation_procedure['parameters'] instead",
            PendingDeprecationWarning,
            stacklevel=2,
        )
        return self.estimation_procedure["parameters"]

    @estimation_parameters.setter
    def estimation_parameters(self, est_parameters: dict[str, str] | None) -> None:
        self.estimation_procedure["parameters"] = est_parameters

`estimation_parameters: dict[str, str] | None` `property` `writable` ¶

Return the estimation parameters for the task.

`get_X_and_y(dataset_format='array')` ¶

get_X_and_y(dataset_format: Literal['array'] = 'array') -> tuple[np.ndarray | scipy.sparse.spmatrix, np.ndarray | None]

get_X_and_y(dataset_format: Literal['dataframe']) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]

Get data associated with the current task.

Parameters:

Name	Type	Description	Default
`dataset_format`	`str`	Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data` for possible options.	`'array'`

Returns:

Type	Description
`tuple - X and y`

Source code in openml/tasks/task.py

def get_X_and_y(
    self, dataset_format: Literal["dataframe", "array"] = "array"
) -> tuple[
    np.ndarray | pd.DataFrame | scipy.sparse.spmatrix,
    np.ndarray | pd.Series | pd.DataFrame | None,
]:
    """Get data associated with the current task.

    Parameters
    ----------
    dataset_format : str
        Data structure of the returned data. See :meth:`openml.datasets.OpenMLDataset.get_data`
        for possible options.

    Returns
    -------
    tuple - X and y

    """
    # TODO: [0.15]
    if dataset_format == "array":
        warnings.warn(
            "Support for `dataset_format='array'` will be removed in 0.15,"
            "start using `dataset_format='dataframe' to ensure your code "
            "will continue to work. You can use the dataframe's `to_numpy` "
            "function to continue using numpy arrays.",
            category=FutureWarning,
            stacklevel=2,
        )
    dataset = self.get_dataset()
    if self.task_type_id not in (
        TaskType.SUPERVISED_CLASSIFICATION,
        TaskType.SUPERVISED_REGRESSION,
        TaskType.LEARNING_CURVE,
    ):
        raise NotImplementedError(self.task_type)

    X, y, _, _ = dataset.get_data(
        dataset_format=dataset_format,
        target=self.target_name,
    )
    return X, y

`OpenMLTask` ¶

Bases: OpenMLBase

OpenML Task object.

Parameters:

Name	Type	Description	Default
`task_id`	`int \| None`	Refers to the unique identifier of OpenML task.	required
`task_type_id`	`TaskType`	Refers to the type of OpenML task.	required
`task_type`	`str`	Refers to the OpenML task.	required
`data_set_id`	`int`	Refers to the data.	required
`estimation_procedure_id`	`int`	Refers to the type of estimates used.	`1`
`estimation_procedure_type`	`str \| None`	Refers to the type of estimation procedure used for the OpenML task.	`None`
`estimation_parameters`	`dict[str, str] \| None`	Estimation parameters used for the OpenML task.	`None`
`evaluation_measure`	`str \| None`	Refers to the evaluation measure.	`None`
`data_splits_url`	`str \| None`	Refers to the URL of the data splits used for the OpenML task.	`None`

Source code in openml/tasks/task.py

class OpenMLTask(OpenMLBase):
    """OpenML Task object.

    Parameters
    ----------
    task_id: Union[int, None]
        Refers to the unique identifier of OpenML task.
    task_type_id: TaskType
        Refers to the type of OpenML task.
    task_type: str
        Refers to the OpenML task.
    data_set_id: int
        Refers to the data.
    estimation_procedure_id: int
        Refers to the type of estimates used.
    estimation_procedure_type: str, default=None
        Refers to the type of estimation procedure used for the OpenML task.
    estimation_parameters: [Dict[str, str]], default=None
        Estimation parameters used for the OpenML task.
    evaluation_measure: str, default=None
        Refers to the evaluation measure.
    data_splits_url: str, default=None
        Refers to the URL of the data splits used for the OpenML task.
    """

    def __init__(  # noqa: PLR0913
        self,
        task_id: int | None,
        task_type_id: TaskType,
        task_type: str,
        data_set_id: int,
        estimation_procedure_id: int = 1,
        estimation_procedure_type: str | None = None,
        estimation_parameters: dict[str, str] | None = None,
        evaluation_measure: str | None = None,
        data_splits_url: str | None = None,
    ):
        self.task_id = int(task_id) if task_id is not None else None
        self.task_type_id = task_type_id
        self.task_type = task_type
        self.dataset_id = int(data_set_id)
        self.evaluation_measure = evaluation_measure
        self.estimation_procedure: _EstimationProcedure = {
            "type": estimation_procedure_type,
            "parameters": estimation_parameters,
            "data_splits_url": data_splits_url,
        }
        self.estimation_procedure_id = estimation_procedure_id
        self.split: OpenMLSplit | None = None

    @classmethod
    def _entity_letter(cls) -> str:
        return "t"

    @property
    def id(self) -> int | None:
        """Return the OpenML ID of this task."""
        return self.task_id

    def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]:
        """Collect all information to display in the __repr__ body."""
        base_server_url = openml.config.get_server_base_url()
        fields: dict[str, Any] = {
            "Task Type Description": f"{base_server_url}/tt/{self.task_type_id}"
        }
        if self.task_id is not None:
            fields["Task ID"] = self.task_id
            fields["Task URL"] = self.openml_url
        if self.evaluation_measure is not None:
            fields["Evaluation Measure"] = self.evaluation_measure
        if self.estimation_procedure is not None:
            fields["Estimation Procedure"] = self.estimation_procedure["type"]

        # TODO(eddiebergman): Subclasses could advertise/provide this, instead of having to
        # have the base class know about it's subclasses.
        target_name = getattr(self, "target_name", None)
        if target_name is not None:
            fields["Target Feature"] = target_name

            class_labels = getattr(self, "class_labels", None)
            if class_labels is not None:
                fields["# of Classes"] = len(class_labels)

            if hasattr(self, "cost_matrix"):
                fields["Cost Matrix"] = "Available"

        # determines the order in which the information will be printed
        order = [
            "Task Type Description",
            "Task ID",
            "Task URL",
            "Estimation Procedure",
            "Evaluation Measure",
            "Target Feature",
            "# of Classes",
            "Cost Matrix",
        ]
        return [(key, fields[key]) for key in order if key in fields]

    def get_dataset(self) -> datasets.OpenMLDataset:
        """Download dataset associated with task."""
        return datasets.get_dataset(self.dataset_id)

    def get_train_test_split_indices(
        self,
        fold: int = 0,
        repeat: int = 0,
        sample: int = 0,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Get the indices of the train and test splits for a given task."""
        # Replace with retrieve from cache
        if self.split is None:
            self.split = self.download_split()

        return self.split.get(repeat=repeat, fold=fold, sample=sample)

    def _download_split(self, cache_file: Path) -> None:
        # TODO(eddiebergman): Not sure about this try to read and error approach
        try:
            with cache_file.open(encoding="utf8"):
                pass
        except OSError:
            split_url = self.estimation_procedure["data_splits_url"]
            openml._api_calls._download_text_file(
                source=str(split_url),
                output_path=str(cache_file),
            )

    def download_split(self) -> OpenMLSplit:
        """Download the OpenML split for a given task."""
        # TODO(eddiebergman): Can this every be `None`?
        assert self.task_id is not None
        cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
        cached_split_file = cache_dir / "datasplits.arff"

        try:
            split = OpenMLSplit._from_arff_file(cached_split_file)
        except OSError:
            # Next, download and cache the associated split file
            self._download_split(cached_split_file)
            split = OpenMLSplit._from_arff_file(cached_split_file)

        return split

    def get_split_dimensions(self) -> tuple[int, int, int]:
        """Get the (repeats, folds, samples) of the split for a given task."""
        if self.split is None:
            self.split = self.download_split()

        return self.split.repeats, self.split.folds, self.split.samples

    # TODO(eddiebergman): Really need some better typing on all this
    def _to_dict(self) -> dict[str, dict[str, int | str | list[dict[str, Any]]]]:
        """Creates a dictionary representation of self in a string format (for XML parsing)."""
        oml_input = [
            {"@name": "source_data", "#text": str(self.dataset_id)},
            {"@name": "estimation_procedure", "#text": str(self.estimation_procedure_id)},
        ]
        if self.evaluation_measure is not None:  #
            oml_input.append({"@name": "evaluation_measures", "#text": self.evaluation_measure})

        return {
            "oml:task_inputs": {
                "@xmlns:oml": "http://openml.org/openml",
                "oml:task_type_id": self.task_type_id.value,  # This is an int from the enum?
                "oml:input": oml_input,
            }
        }

    def _parse_publish_response(self, xml_response: dict) -> None:
        """Parse the id from the xml_response and assign it to self."""
        self.task_id = int(xml_response["oml:upload_task"]["oml:id"])

`id: int | None` `property` ¶

Return the OpenML ID of this task.

`download_split()` ¶

Download the OpenML split for a given task.

Source code in openml/tasks/task.py

def download_split(self) -> OpenMLSplit:
    """Download the OpenML split for a given task."""
    # TODO(eddiebergman): Can this every be `None`?
    assert self.task_id is not None
    cache_dir = _create_cache_directory_for_id("tasks", self.task_id)
    cached_split_file = cache_dir / "datasplits.arff"

    try:
        split = OpenMLSplit._from_arff_file(cached_split_file)
    except OSError:
        # Next, download and cache the associated split file
        self._download_split(cached_split_file)
        split = OpenMLSplit._from_arff_file(cached_split_file)

    return split

`get_dataset()` ¶

Download dataset associated with task.

Source code in openml/tasks/task.py

def get_dataset(self) -> datasets.OpenMLDataset:
    """Download dataset associated with task."""
    return datasets.get_dataset(self.dataset_id)

`get_split_dimensions()` ¶

Get the (repeats, folds, samples) of the split for a given task.

Source code in openml/tasks/task.py

def get_split_dimensions(self) -> tuple[int, int, int]:
    """Get the (repeats, folds, samples) of the split for a given task."""
    if self.split is None:
        self.split = self.download_split()

    return self.split.repeats, self.split.folds, self.split.samples

`get_train_test_split_indices(fold=0, repeat=0, sample=0)` ¶

Get the indices of the train and test splits for a given task.

Source code in openml/tasks/task.py

def get_train_test_split_indices(
    self,
    fold: int = 0,
    repeat: int = 0,
    sample: int = 0,
) -> tuple[np.ndarray, np.ndarray]:
    """Get the indices of the train and test splits for a given task."""
    # Replace with retrieve from cache
    if self.split is None:
        self.split = self.download_split()

    return self.split.get(repeat=repeat, fold=fold, sample=sample)

`TaskType` ¶

Bases: Enum

Possible task types as defined in OpenML.

Source code in openml/tasks/task.py

class TaskType(Enum):
    """Possible task types as defined in OpenML."""

    SUPERVISED_CLASSIFICATION = 1
    SUPERVISED_REGRESSION = 2
    LEARNING_CURVE = 3
    SUPERVISED_DATASTREAM_CLASSIFICATION = 4
    CLUSTERING = 5
    MACHINE_LEARNING_CHALLENGE = 6
    SURVIVAL_ANALYSIS = 7
    SUBGROUP_DISCOVERY = 8
    MULTITASK_REGRESSION = 9

`create_task(task_type, dataset_id, estimation_procedure_id, target_name=None, evaluation_measure=None, **kwargs)` ¶

Create a task based on different given attributes.

Builds a task object with the function arguments as attributes. The type of the task object built is determined from the task type id. More information on how the arguments (task attributes), relate to the different possible tasks can be found in the individual task objects at the openml.tasks.task module.

Parameters:

Name	Type	Description	Default
`task_type`	`TaskType`	Id of the task type.	required
`dataset_id`	`int`	The id of the dataset for the task.	required
`target_name`	`str`	The name of the feature used as a target. At the moment, only optional for the clustering tasks.	`None`
`estimation_procedure_id`	`int`	The id of the estimation procedure.	required
`evaluation_measure`	`str`	The name of the evaluation measure.	`None`
`kwargs`	`dict`	Other task attributes that are not mandatory for task upload.	`{}`

Returns:

Type	Description
`(OpenMLClassificationTask, OpenMLRegressionTask)`
`(OpenMLLearningCurveTask, OpenMLClusteringTask)`

Source code in openml/tasks/functions.py

def create_task(
    task_type: TaskType,
    dataset_id: int,
    estimation_procedure_id: int,
    target_name: str | None = None,
    evaluation_measure: str | None = None,
    **kwargs: Any,
) -> (
    OpenMLClassificationTask | OpenMLRegressionTask | OpenMLLearningCurveTask | OpenMLClusteringTask
):
    """Create a task based on different given attributes.

    Builds a task object with the function arguments as
    attributes. The type of the task object built is
    determined from the task type id.
    More information on how the arguments (task attributes),
    relate to the different possible tasks can be found in
    the individual task objects at the openml.tasks.task
    module.

    Parameters
    ----------
    task_type : TaskType
        Id of the task type.
    dataset_id : int
        The id of the dataset for the task.
    target_name : str, optional
        The name of the feature used as a target.
        At the moment, only optional for the clustering tasks.
    estimation_procedure_id : int
        The id of the estimation procedure.
    evaluation_measure : str, optional
        The name of the evaluation measure.
    kwargs : dict, optional
        Other task attributes that are not mandatory
        for task upload.

    Returns
    -------
    OpenMLClassificationTask, OpenMLRegressionTask,
    OpenMLLearningCurveTask, OpenMLClusteringTask
    """
    if task_type == TaskType.CLUSTERING:
        task_cls = OpenMLClusteringTask
    elif task_type == TaskType.LEARNING_CURVE:
        task_cls = OpenMLLearningCurveTask  # type: ignore
    elif task_type == TaskType.SUPERVISED_CLASSIFICATION:
        task_cls = OpenMLClassificationTask  # type: ignore
    elif task_type == TaskType.SUPERVISED_REGRESSION:
        task_cls = OpenMLRegressionTask  # type: ignore
    else:
        raise NotImplementedError(f"Task type {task_type:d} not supported.")

    return task_cls(
        task_type_id=task_type,
        task_type="None",  # TODO: refactor to get task type string from ID.
        data_set_id=dataset_id,
        target_name=target_name,
        estimation_procedure_id=estimation_procedure_id,
        evaluation_measure=evaluation_measure,
        **kwargs,
    )

`delete_task(task_id)` ¶

Delete task with id task_id from the OpenML server.

You can only delete tasks which you created and have no runs associated with them.

Parameters:

Name	Type	Description	Default
`task_id`	`int`	OpenML id of the task	required

Returns:

Type	Description
`bool`	True if the deletion was successful. False otherwise.

Source code in openml/tasks/functions.py

def delete_task(task_id: int) -> bool:
    """Delete task with id `task_id` from the OpenML server.

    You can only delete tasks which you created and have
    no runs associated with them.

    Parameters
    ----------
    task_id : int
        OpenML id of the task

    Returns
    -------
    bool
        True if the deletion was successful. False otherwise.
    """
    return openml.utils._delete_entity("task", task_id)

`get_task(task_id, *dataset_args, download_splits=None, **get_dataset_kwargs)` ¶

Download OpenML task for a given task ID.

Downloads the task representation. By default, this will also download the data splits and the dataset. From version 0.15.0 onwards, the splits nor the dataset will not be downloaded by default.

Use the download_splits parameter to control whether the splits are downloaded. Moreover, you may pass additional parameter (args or kwargs) that are passed to :meth:openml.datasets.get_dataset. For backwards compatibility, if download_data is passed as an additional parameter and download_splits is not explicitly set, download_data also overrules download_splits's value (deprecated from Version 0.15.0 onwards).

Parameters:

Name	Type	Description	Default
`task_id`	`int`	The OpenML task id of the task to download.	required
`download_splits`	`bool \| None`	Whether to download the splits as well. From version 0.15.0 onwards this is independent of download_data and will default to `False`.	`None`
`dataset_args`	`Any`	Args and kwargs can be used pass optional parameters to :meth:`openml.datasets.get_dataset`. This includes `download_data`. If set to True the splits are downloaded as well (deprecated from Version 0.15.0 onwards). The args are only present for backwards compatibility and will be removed from version 0.15.0 onwards.	`()`
`get_dataset_kwargs`	`Any`	Args and kwargs can be used pass optional parameters to :meth:`openml.datasets.get_dataset`. This includes `download_data`. If set to True the splits are downloaded as well (deprecated from Version 0.15.0 onwards). The args are only present for backwards compatibility and will be removed from version 0.15.0 onwards.	`()`

Returns:

Name	Type	Description
`task`	`OpenMLTask`

Source code in openml/tasks/functions.py

@openml.utils.thread_safe_if_oslo_installed
def get_task(
    task_id: int,
    *dataset_args: Any,
    download_splits: bool | None = None,
    **get_dataset_kwargs: Any,
) -> OpenMLTask:
    """Download OpenML task for a given task ID.

    Downloads the task representation. By default, this will also download the data splits and
    the dataset. From version 0.15.0 onwards, the splits nor the dataset will not be downloaded by
    default.

    Use the `download_splits` parameter to control whether the splits are downloaded.
    Moreover, you may pass additional parameter (args or kwargs) that are passed to
    :meth:`openml.datasets.get_dataset`.
    For backwards compatibility, if `download_data` is passed as an additional parameter and
    `download_splits` is not explicitly set, `download_data` also overrules `download_splits`'s
    value (deprecated from Version 0.15.0 onwards).

    Parameters
    ----------
    task_id : int
        The OpenML task id of the task to download.
    download_splits: bool (default=True)
        Whether to download the splits as well. From version 0.15.0 onwards this is independent
        of download_data and will default to ``False``.
    dataset_args, get_dataset_kwargs :
        Args and kwargs can be used pass optional parameters to :meth:`openml.datasets.get_dataset`.
        This includes `download_data`. If set to True the splits are downloaded as well
        (deprecated from Version 0.15.0 onwards). The args are only present for backwards
        compatibility and will be removed from version 0.15.0 onwards.

    Returns
    -------
    task: OpenMLTask
    """
    if download_splits is None:
        # TODO(0.15): Switch download splits to False by default, adjust typing above, adjust
        #  documentation above, and remove warning.
        warnings.warn(
            "Starting from Version 0.15.0 `download_splits` will default to ``False`` instead "
            "of ``True`` and be independent from `download_data`. To disable this message until "
            "version 0.15 explicitly set `download_splits` to a bool.",
            FutureWarning,
            stacklevel=3,
        )
        download_splits = get_dataset_kwargs.get("download_data", True)

    if not isinstance(task_id, int):
        # TODO(0.15): Remove warning
        warnings.warn(
            "Task id must be specified as `int` from 0.14.0 onwards.",
            FutureWarning,
            stacklevel=3,
        )

    try:
        task_id = int(task_id)
    except (ValueError, TypeError) as e:
        raise ValueError("Dataset ID is neither an Integer nor can be cast to an Integer.") from e

    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)

    try:
        task = _get_task_description(task_id)
        dataset = get_dataset(task.dataset_id, *dataset_args, **get_dataset_kwargs)
        # List of class labels available in dataset description
        # Including class labels as part of task meta data handles
        #   the case where data download was initially disabled
        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
            task.class_labels = dataset.retrieve_class_labels(task.target_name)
        # Clustering tasks do not have class labels
        # and do not offer download_split
        if download_splits and isinstance(task, OpenMLSupervisedTask):
            task.download_split()
    except Exception as e:
        openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir)
        raise e

    return task

`get_tasks(task_ids, download_data=True, download_qualities=True)` ¶

Download tasks.

This function iterates :meth:openml.tasks.get_task.

Parameters:

Name	Type	Description	Default
`task_ids`	`List[int]`	A list of task ids to download.	required
`download_data`	`bool(default=True)`	Option to trigger download of data along with the meta data.	`True`
`download_qualities`	`bool(default=True)`	Option to download 'qualities' meta-data in addition to the minimal dataset description.	`True`

Returns:

Type	Description
`list`

Source code in openml/tasks/functions.py

def get_tasks(
    task_ids: list[int],
    download_data: bool = True,  # noqa: FBT001, FBT002
    download_qualities: bool = True,  # noqa: FBT001, FBT002
) -> list[OpenMLTask]:
    """Download tasks.

    This function iterates :meth:`openml.tasks.get_task`.

    Parameters
    ----------
    task_ids : List[int]
        A list of task ids to download.
    download_data : bool (default = True)
        Option to trigger download of data along with the meta data.
    download_qualities : bool (default=True)
        Option to download 'qualities' meta-data in addition to the minimal dataset description.

    Returns
    -------
    list
    """
    tasks = []
    for task_id in task_ids:
        tasks.append(get_task(task_id, download_data, download_qualities))
    return tasks

`list_tasks(task_type=None, offset=None, size=None, tag=None, output_format='dict', **kwargs)` ¶

Return a number of tasks having the given tag and task_type

Parameters:

Name	Type	Description	Default
`Filter`			required
`it`			required
`type`			required
`task_type`	`TaskType`	Refers to the type of task.	`None`
`offset`	`int`	the number of tasks to skip, starting from the first	`None`
`size`	`int`	the maximum number of tasks to show	`None`
`tag`	`str`	the tag to include	`None`
`output_format`	`Literal['dict', 'dataframe']`	The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame	`'dict'`
`kwargs`	`Any`	Legal filter operators: data_tag, status, data_id, data_name, number_instances, number_features, number_classes, number_missing_values.	`{}`

Returns:

Type	Description
`dict`	All tasks having the given task_type and the give tag. Every task is represented by a dictionary containing the following information: task id, dataset id, task_type and status. If qualities are calculated for the associated dataset, some of these are also returned.
`dataframe`	All tasks having the given task_type and the give tag. Every task is represented by a row in the data frame containing the following information as columns: task id, dataset id, task_type and status. If qualities are calculated for the associated dataset, some of these are also returned.

Source code in openml/tasks/functions.py

def list_tasks(
    task_type: TaskType | None = None,
    offset: int | None = None,
    size: int | None = None,
    tag: str | None = None,
    output_format: Literal["dict", "dataframe"] = "dict",
    **kwargs: Any,
) -> dict | pd.DataFrame:
    """
    Return a number of tasks having the given tag and task_type

    Parameters
    ----------
    Filter task_type is separated from the other filters because
    it is used as task_type in the task description, but it is named
    type when used as a filter in list tasks call.
    task_type : TaskType, optional
        Refers to the type of task.
    offset : int, optional
        the number of tasks to skip, starting from the first
    size : int, optional
        the maximum number of tasks to show
    tag : str, optional
        the tag to include
    output_format: str, optional (default='dict')
        The parameter decides the format of the output.
        - If 'dict' the output is a dict of dict
        - If 'dataframe' the output is a pandas DataFrame
    kwargs: dict, optional
        Legal filter operators: data_tag, status, data_id, data_name,
        number_instances, number_features,
        number_classes, number_missing_values.

    Returns
    -------
    dict
        All tasks having the given task_type and the give tag. Every task is
        represented by a dictionary containing the following information:
        task id, dataset id, task_type and status. If qualities are calculated
        for the associated dataset, some of these are also returned.
    dataframe
        All tasks having the given task_type and the give tag. Every task is
        represented by a row in the data frame containing the following information
        as columns: task id, dataset id, task_type and status. If qualities are
        calculated for the associated dataset, some of these are also returned.
    """
    if output_format not in ["dataframe", "dict"]:
        raise ValueError(
            "Invalid output format selected. " "Only 'dict' or 'dataframe' applicable.",
        )
    # TODO: [0.15]
    if output_format == "dict":
        msg = (
            "Support for `output_format` of 'dict' will be removed in 0.15 "
            "and pandas dataframes will be returned instead. To ensure your code "
            "will continue to work, use `output_format`='dataframe'."
        )
        warnings.warn(msg, category=FutureWarning, stacklevel=2)
    return openml.utils._list_all(  # type: ignore
        list_output_format=output_format,  # type: ignore
        listing_call=_list_tasks,
        task_type=task_type,
        offset=offset,
        size=size,
        tag=tag,
        **kwargs,
    )

tasks

OpenMLClassificationTask ¶

OpenMLClusteringTask ¶

get_X(dataset_format='array') ¶

OpenMLLearningCurveTask ¶

OpenMLRegressionTask ¶

OpenMLSplit ¶

get(repeat=0, fold=0, sample=0) ¶

OpenMLSupervisedTask ¶

estimation_parameters: dict[str, str] | None property writable ¶

get_X_and_y(dataset_format='array') ¶

OpenMLTask ¶

id: int | None property ¶

download_split() ¶

get_dataset() ¶

get_split_dimensions() ¶

get_train_test_split_indices(fold=0, repeat=0, sample=0) ¶

TaskType ¶

create_task(task_type, dataset_id, estimation_procedure_id, target_name=None, evaluation_measure=None, **kwargs) ¶

delete_task(task_id) ¶

get_task(task_id, *dataset_args, download_splits=None, **get_dataset_kwargs) ¶

get_tasks(task_ids, download_data=True, download_qualities=True) ¶

list_tasks(task_type=None, offset=None, size=None, tag=None, output_format='dict', **kwargs) ¶

`OpenMLClassificationTask` ¶

`OpenMLClusteringTask` ¶

`get_X(dataset_format='array')` ¶

`OpenMLLearningCurveTask` ¶

`OpenMLRegressionTask` ¶

`OpenMLSplit` ¶

`get(repeat=0, fold=0, sample=0)` ¶

`OpenMLSupervisedTask` ¶

`estimation_parameters: dict[str, str] | None` `property` `writable` ¶

`get_X_and_y(dataset_format='array')` ¶

`OpenMLTask` ¶

`id: int | None` `property` ¶

`download_split()` ¶

`get_dataset()` ¶

`get_split_dimensions()` ¶

`get_train_test_split_indices(fold=0, repeat=0, sample=0)` ¶

`TaskType` ¶

`create_task(task_type, dataset_id, estimation_procedure_id, target_name=None, evaluation_measure=None, **kwargs)` ¶

`delete_task(task_id)` ¶

`get_task(task_id, *dataset_args, download_splits=None, **get_dataset_kwargs)` ¶

`get_tasks(task_ids, download_data=True, download_qualities=True)` ¶

`list_tasks(task_type=None, offset=None, size=None, tag=None, output_format='dict', **kwargs)` ¶