functions

`__list_runs(api_call, output_format='dict')` ¶

Helper function to parse API calls which are lists of runs

Source code in openml/runs/functions.py

def __list_runs(
    api_call: str, output_format: Literal["dict", "dataframe"] = "dict"
) -> dict | pd.DataFrame:
    """Helper function to parse API calls which are lists of runs"""
    xml_string = openml._api_calls._perform_api_call(api_call, "get")
    runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
    # Minimalistic check if the XML is useful
    if "oml:runs" not in runs_dict:
        raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}')

    if "@xmlns:oml" not in runs_dict["oml:runs"]:
        raise ValueError(
            f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {runs_dict}'
        )

    if runs_dict["oml:runs"]["@xmlns:oml"] != "http://openml.org/openml":
        raise ValueError(
            "Error in return XML, value of  "
            '"oml:runs"/@xmlns:oml is not '
            f'"http://openml.org/openml": {runs_dict}',
        )

    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])

    runs = {
        int(r["oml:run_id"]): {
            "run_id": int(r["oml:run_id"]),
            "task_id": int(r["oml:task_id"]),
            "setup_id": int(r["oml:setup_id"]),
            "flow_id": int(r["oml:flow_id"]),
            "uploader": int(r["oml:uploader"]),
            "task_type": TaskType(int(r["oml:task_type_id"])),
            "upload_time": str(r["oml:upload_time"]),
            "error_message": str((r["oml:error_message"]) or ""),
        }
        for r in runs_dict["oml:runs"]["oml:run"]
    }

    if output_format == "dataframe":
        runs = pd.DataFrame.from_dict(runs, orient="index")

    return runs

`delete_run(run_id)` ¶

Delete run with id run_id from the OpenML server.

You can only delete runs which you uploaded.

Parameters:

Name	Type	Description	Default
`run_id`	`int`	OpenML id of the run	required

Returns:

Type	Description
`bool`	True if the deletion was successful. False otherwise.

Source code in openml/runs/functions.py

def delete_run(run_id: int) -> bool:
    """Delete run with id `run_id` from the OpenML server.

    You can only delete runs which you uploaded.

    Parameters
    ----------
    run_id : int
        OpenML id of the run

    Returns
    -------
    bool
        True if the deletion was successful. False otherwise.
    """
    return openml.utils._delete_entity("run", run_id)

`format_prediction(task, repeat, fold, index, prediction, truth, sample=None, proba=None)` ¶

Format the predictions in the specific order as required for the run results.

Parameters:

Name	Type	Description	Default
`task`	`OpenMLSupervisedTask`	Task for which to format the predictions.	required
`repeat`	`int`	From which repeat this predictions is made.	required
`fold`	`int`	From which fold this prediction is made.	required
`index`	`int`	For which index this prediction is made.	required
`prediction`	`str \| int \| float`	The predicted class label or value.	required
`truth`	`str \| int \| float`	The true class label or value.	required
`sample`	`int \| None`	From which sample set this prediction is made. Required only for LearningCurve tasks.	`None`
`proba`	`dict[str, float] \| None`	For classification tasks only. A mapping from each class label to their predicted probability. The dictionary should contain an entry for each of the `task.class_labels`. E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}	`None`

Returns:

Type	Description
`A list with elements for the prediction results of a run.`
`The returned order of the elements is (if available):`	[repeat, fold, sample, index, prediction, truth, *probabilities]
`This order follows the R Client API.`

Source code in openml/runs/functions.py

def format_prediction(  # noqa: PLR0913
    task: OpenMLSupervisedTask,
    repeat: int,
    fold: int,
    index: int,
    prediction: str | int | float,
    truth: str | int | float,
    sample: int | None = None,
    proba: dict[str, float] | None = None,
) -> list[str | int | float]:
    """Format the predictions in the specific order as required for the run results.

    Parameters
    ----------
    task: OpenMLSupervisedTask
        Task for which to format the predictions.
    repeat: int
        From which repeat this predictions is made.
    fold: int
        From which fold this prediction is made.
    index: int
        For which index this prediction is made.
    prediction: str, int or float
        The predicted class label or value.
    truth: str, int or float
        The true class label or value.
    sample: int, optional (default=None)
        From which sample set this prediction is made.
        Required only for LearningCurve tasks.
    proba: Dict[str, float], optional (default=None)
        For classification tasks only.
        A mapping from each class label to their predicted probability.
        The dictionary should contain an entry for each of the `task.class_labels`.
        E.g.: {"Iris-Setosa": 0.2, "Iris-Versicolor": 0.7, "Iris-Virginica": 0.1}

    Returns
    -------
    A list with elements for the prediction results of a run.

    The returned order of the elements is (if available):
        [repeat, fold, sample, index, prediction, truth, *probabilities]

    This order follows the R Client API.
    """
    if isinstance(task, OpenMLClassificationTask):
        if proba is None:
            raise ValueError("`proba` is required for classification task")
        if task.class_labels is None:
            raise ValueError("The classification task must have class labels set")
        if not set(task.class_labels) == set(proba):
            raise ValueError("Each class should have a predicted probability")
        if sample is None:
            if isinstance(task, OpenMLLearningCurveTask):
                raise ValueError("`sample` can not be none for LearningCurveTask")

            sample = 0
        probabilities = [proba[c] for c in task.class_labels]
        return [repeat, fold, sample, index, prediction, truth, *probabilities]

    if isinstance(task, OpenMLRegressionTask):
        return [repeat, fold, index, prediction, truth]

    raise NotImplementedError(f"Formatting for {type(task)} is not supported.")

`get_run(run_id, ignore_cache=False)` ¶

Gets run corresponding to run_id.

Parameters:

Name	Type	Description	Default
`run_id`	`int`		required
`ignore_cache`	`bool`	Whether to ignore the cache. If `true` this will download and overwrite the run xml even if the requested run is already cached.	`False`
`ignore_cache`	`bool`		`False`

Returns:

Name	Type	Description
`run`	`OpenMLRun`	Run corresponding to ID, fetched from the server.

Source code in openml/runs/functions.py

@openml.utils.thread_safe_if_oslo_installed
def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT002, FBT001
    """Gets run corresponding to run_id.

    Parameters
    ----------
    run_id : int

    ignore_cache : bool
        Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
        even if the requested run is already cached.

    ignore_cache

    Returns
    -------
    run : OpenMLRun
        Run corresponding to ID, fetched from the server.
    """
    run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id))
    run_file = run_dir / "description.xml"

    run_dir.mkdir(parents=True, exist_ok=True)

    try:
        if not ignore_cache:
            return _get_cached_run(run_id)

        raise OpenMLCacheException(message="dummy")

    except OpenMLCacheException:
        run_xml = openml._api_calls._perform_api_call("run/%d" % run_id, "get")
        with run_file.open("w", encoding="utf8") as fh:
            fh.write(run_xml)

    return _create_run_from_xml(run_xml)

`get_run_trace(run_id)` ¶

Get the optimization trace object for a given run id.

Parameters:

Name	Type	Description	Default
`run_id`	`int`		required

Returns:

Type	Description
`OpenMLTrace`

Source code in openml/runs/functions.py

def get_run_trace(run_id: int) -> OpenMLRunTrace:
    """
    Get the optimization trace object for a given run id.

    Parameters
    ----------
    run_id : int

    Returns
    -------
    openml.runs.OpenMLTrace
    """
    trace_xml = openml._api_calls._perform_api_call("run/trace/%d" % run_id, "get")
    return OpenMLRunTrace.trace_from_xml(trace_xml)

`get_runs(run_ids)` ¶

Gets all runs in run_ids list.

Parameters:

Name	Type	Description	Default
`run_ids`	`list of ints`		required

Returns:

Name	Type	Description
`runs`	`list of OpenMLRun`	List of runs corresponding to IDs, fetched from the server.

Source code in openml/runs/functions.py

def get_runs(run_ids: list[int]) -> list[OpenMLRun]:
    """Gets all runs in run_ids list.

    Parameters
    ----------
    run_ids : list of ints

    Returns
    -------
    runs : list of OpenMLRun
        List of runs corresponding to IDs, fetched from the server.
    """
    runs = []
    for run_id in run_ids:
        runs.append(get_run(run_id))
    return runs

`initialize_model_from_run(run_id)` ¶

Initialized a model based on a run_id (i.e., using the exact same parameter settings)

Parameters:

Name	Type	Description	Default
`run_id`	`int`	The Openml run_id	required

Returns:

Type	Description
`model`

Source code in openml/runs/functions.py

def initialize_model_from_run(run_id: int) -> Any:
    """
    Initialized a model based on a run_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    run_id : int
        The Openml run_id

    Returns
    -------
    model
    """
    run = get_run(run_id)
    # TODO(eddiebergman): I imagine this is None if it's not published,
    # might need to raise an explicit error for that
    assert run.setup_id is not None
    return initialize_model(run.setup_id)

`initialize_model_from_trace(run_id, repeat, fold, iteration=None)` ¶

Initialize a model based on the parameters that were set by an optimization procedure (i.e., using the exact same parameter settings)

Parameters:

Name	Type	Description	Default
`run_id`	`int`	The Openml run_id. Should contain a trace file, otherwise a OpenMLServerException is raised	required
`repeat`	`int`	The repeat nr (column in trace file)	required
`fold`	`int`	The fold nr (column in trace file)	required
`iteration`	`int`	The iteration nr (column in trace file). If None, the best (selected) iteration will be searched (slow), according to the selection criteria implemented in OpenMLRunTrace.get_selected_iteration	`None`

Returns:

Type	Description
`model`

Source code in openml/runs/functions.py

def initialize_model_from_trace(
    run_id: int,
    repeat: int,
    fold: int,
    iteration: int | None = None,
) -> Any:
    """
    Initialize a model based on the parameters that were set
    by an optimization procedure (i.e., using the exact same
    parameter settings)

    Parameters
    ----------
    run_id : int
        The Openml run_id. Should contain a trace file,
        otherwise a OpenMLServerException is raised

    repeat : int
        The repeat nr (column in trace file)

    fold : int
        The fold nr (column in trace file)

    iteration : int
        The iteration nr (column in trace file). If None, the
        best (selected) iteration will be searched (slow),
        according to the selection criteria implemented in
        OpenMLRunTrace.get_selected_iteration

    Returns
    -------
    model
    """
    run = get_run(run_id)
    # TODO(eddiebergman): I imagine this is None if it's not published,
    # might need to raise an explicit error for that
    assert run.flow_id is not None

    flow = get_flow(run.flow_id)
    run_trace = get_run_trace(run_id)

    if iteration is None:
        iteration = run_trace.get_selected_iteration(repeat, fold)

    request = (repeat, fold, iteration)
    if request not in run_trace.trace_iterations:
        raise ValueError("Combination repeat, fold, iteration not available")
    current = run_trace.trace_iterations[(repeat, fold, iteration)]

    search_model = initialize_model_from_run(run_id)
    return flow.extension.instantiate_model_from_hpo_class(search_model, current)

`list_runs(offset=None, size=None, id=None, task=None, setup=None, flow=None, uploader=None, tag=None, study=None, display_errors=False, output_format='dict', **kwargs)` ¶

List all runs matching all of the given filters. (Supports large amount of results)

Parameters:

Name	Type	Description	Default
`offset`	`int`	the number of runs to skip, starting from the first	`None`
`size`	`int`	the maximum number of runs to show	`None`
`id`	`list`		`None`
`task`	`list`		`None`
`setup`	`list \| None`		`None`
`flow`	`list`		`None`
`uploader`	`list`		`None`
`tag`	`str`		`None`
`study`	`int`		`None`
`display_errors`	`(bool, optional(default=None))`	Whether to list runs which have an error (for example a missing prediction file).	`False`
`output_format`	`Literal['dict', 'dataframe']`	The parameter decides the format of the output. - If 'dict' the output is a dict of dict - If 'dataframe' the output is a pandas DataFrame	`'dict'`
`kwargs`	`dict`	Legal filter operators: task_type.	`{}`

Returns:

Type	Description
`dict of dicts, or dataframe`

Source code in openml/runs/functions.py

def list_runs(  # noqa: PLR0913
    offset: int | None = None,
    size: int | None = None,
    id: list | None = None,  # noqa: A002
    task: list[int] | None = None,
    setup: list | None = None,
    flow: list | None = None,
    uploader: list | None = None,
    tag: str | None = None,
    study: int | None = None,
    display_errors: bool = False,  # noqa: FBT001, FBT002
    output_format: Literal["dict", "dataframe"] = "dict",
    **kwargs: Any,
) -> dict | pd.DataFrame:
    """
    List all runs matching all of the given filters.
    (Supports large amount of results)

    Parameters
    ----------
    offset : int, optional
        the number of runs to skip, starting from the first
    size : int, optional
        the maximum number of runs to show

    id : list, optional

    task : list, optional

    setup: list, optional

    flow : list, optional

    uploader : list, optional

    tag : str, optional

    study : int, optional

    display_errors : bool, optional (default=None)
        Whether to list runs which have an error (for example a missing
        prediction file).

    output_format: str, optional (default='dict')
        The parameter decides the format of the output.
        - If 'dict' the output is a dict of dict
        - If 'dataframe' the output is a pandas DataFrame

    kwargs : dict, optional
        Legal filter operators: task_type.

    Returns
    -------
    dict of dicts, or dataframe
    """
    if output_format not in ["dataframe", "dict"]:
        raise ValueError("Invalid output format selected. Only 'dict' or 'dataframe' applicable.")

    # TODO: [0.15]
    if output_format == "dict":
        msg = (
            "Support for `output_format` of 'dict' will be removed in 0.15 "
            "and pandas dataframes will be returned instead. To ensure your code "
            "will continue to work, use `output_format`='dataframe'."
        )
        warnings.warn(msg, category=FutureWarning, stacklevel=2)

    # TODO(eddiebergman): Do we really need this runtime type validation?
    if id is not None and (not isinstance(id, list)):
        raise TypeError("id must be of type list.")
    if task is not None and (not isinstance(task, list)):
        raise TypeError("task must be of type list.")
    if setup is not None and (not isinstance(setup, list)):
        raise TypeError("setup must be of type list.")
    if flow is not None and (not isinstance(flow, list)):
        raise TypeError("flow must be of type list.")
    if uploader is not None and (not isinstance(uploader, list)):
        raise TypeError("uploader must be of type list.")

    return openml.utils._list_all(  # type: ignore
        list_output_format=output_format,  # type: ignore
        listing_call=_list_runs,
        offset=offset,
        size=size,
        id=id,
        task=task,
        setup=setup,
        flow=flow,
        uploader=uploader,
        tag=tag,
        study=study,
        display_errors=display_errors,
        **kwargs,
    )

`run_exists(task_id, setup_id)` ¶

Checks whether a task/setup combination is already present on the server.

Parameters:

Name	Type	Description	Default
`task_id`	`int`		required
`setup_id`	`int`		required

Returns:

Type	Description
`Set run ids for runs where flow setup_id was run on task_id. Empty`	set if it wasn't run yet.

Source code in openml/runs/functions.py

def run_exists(task_id: int, setup_id: int) -> set[int]:
    """Checks whether a task/setup combination is already present on the
    server.

    Parameters
    ----------
    task_id : int

    setup_id : int

    Returns
    -------
        Set run ids for runs where flow setup_id was run on task_id. Empty
        set if it wasn't run yet.
    """
    if setup_id <= 0:
        # openml setups are in range 1-inf
        return set()

    try:
        result = list_runs(task=[task_id], setup=[setup_id], output_format="dataframe")
        assert isinstance(result, pd.DataFrame)  # TODO(eddiebergman): Remove once #1299
        return set() if result.empty else set(result["run_id"])
    except OpenMLServerException as exception:
        # error code implies no results. The run does not exist yet
        if exception.code != ERROR_CODE:
            raise exception
        return set()

`run_flow_on_task(flow, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, dataset_format='dataframe', n_jobs=None)` ¶

Run the model provided by the flow on the dataset defined by task.

Takes the flow and repeat information into account. The Flow may optionally be published.

Parameters:

Name	Type	Description	Default
`flow`	`OpenMLFlow`	A flow wraps a machine learning model together with relevant information. The model has a function fit(X,Y) and predict(X), all supervised estimators of scikit learn follow this definition of a model (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)	required
`task`	`OpenMLTask`	Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.	required
`avoid_duplicate_runs`	`(bool, optional(default=True))`	If True, the run will throw an error if the setup/task combination is already present on the server. This feature requires an internet connection.	`True`
`avoid_duplicate_runs`	`(bool, optional(default=True))`	If True, the run will throw an error if the setup/task combination is already present on the server. This feature requires an internet connection.	`True`
`flow_tags`	`(List[str], optional(default=None))`	A list of tags that the flow should have at creation.	`None`
`seed`	`int \| None`	Models that are not seeded will get this seed.	`None`
`add_local_measures`	`(bool, optional(default=True))`	Determines whether to calculate a set of evaluation measures locally, to later verify server behaviour.	`True`
`upload_flow`	`bool(default=False)`	If True, upload the flow to OpenML if it does not exist yet. If False, do not upload the flow to OpenML.	`False`
`dataset_format`	`str(default='dataframe')`	If 'array', the dataset is passed to the model as a numpy array. If 'dataframe', the dataset is passed to the model as a pandas dataframe.	`'dataframe'`
`n_jobs`	`int(default=None)`	The number of processes/threads to distribute the evaluation asynchronously. If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially. If `-1`, then the job uses as many cores available.	`None`

Returns:

Name	Type	Description
`run`	`OpenMLRun`	Result of the run.

Source code in openml/runs/functions.py

def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
    flow: OpenMLFlow,
    task: OpenMLTask,
    avoid_duplicate_runs: bool = True,  # noqa: FBT002, FBT001
    flow_tags: list[str] | None = None,
    seed: int | None = None,
    add_local_measures: bool = True,  # noqa: FBT001, FBT002
    upload_flow: bool = False,  # noqa: FBT001, FBT002
    dataset_format: Literal["array", "dataframe"] = "dataframe",
    n_jobs: int | None = None,
) -> OpenMLRun:
    """Run the model provided by the flow on the dataset defined by task.

    Takes the flow and repeat information into account.
    The Flow may optionally be published.

    Parameters
    ----------
    flow : OpenMLFlow
        A flow wraps a machine learning model together with relevant information.
        The model has a function fit(X,Y) and predict(X),
        all supervised estimators of scikit learn follow this definition of a model
        (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
    task : OpenMLTask
        Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
    avoid_duplicate_runs : bool, optional (default=True)
        If True, the run will throw an error if the setup/task combination is already present on
        the server. This feature requires an internet connection.
    avoid_duplicate_runs : bool, optional (default=True)
        If True, the run will throw an error if the setup/task combination is already present on
        the server. This feature requires an internet connection.
    flow_tags : List[str], optional (default=None)
        A list of tags that the flow should have at creation.
    seed: int, optional (default=None)
        Models that are not seeded will get this seed.
    add_local_measures : bool, optional (default=True)
        Determines whether to calculate a set of evaluation measures locally,
        to later verify server behaviour.
    upload_flow : bool (default=False)
        If True, upload the flow to OpenML if it does not exist yet.
        If False, do not upload the flow to OpenML.
    dataset_format : str (default='dataframe')
        If 'array', the dataset is passed to the model as a numpy array.
        If 'dataframe', the dataset is passed to the model as a pandas dataframe.
    n_jobs : int (default=None)
        The number of processes/threads to distribute the evaluation asynchronously.
        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
        If `-1`, then the job uses as many cores available.

    Returns
    -------
    run : OpenMLRun
        Result of the run.
    """
    if flow_tags is not None and not isinstance(flow_tags, list):
        raise ValueError("flow_tags should be a list")

    # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
    # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
    if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):
        # We want to allow either order of argument (to avoid confusion).
        warnings.warn(
            "The old argument order (Flow, model) is deprecated and "
            "will not be supported in the future. Please use the "
            "order (model, Flow).",
            DeprecationWarning,
            stacklevel=2,
        )
        task, flow = flow, task

    if task.task_id is None:
        raise ValueError("The task should be published at OpenML")

    if flow.model is None:
        flow.model = flow.extension.flow_to_model(flow)

    flow.model = flow.extension.seed_model(flow.model, seed=seed)

    # We only need to sync with the server right now if we want to upload the flow,
    # or ensure no duplicate runs exist. Otherwise it can be synced at upload time.
    flow_id = None
    if upload_flow or avoid_duplicate_runs:
        flow_id = flow_exists(flow.name, flow.external_version)
        if isinstance(flow.flow_id, int) and flow_id != flow.flow_id:
            if flow_id is not False:
                raise PyOpenMLError(
                    "Local flow_id does not match server flow_id: "
                    f"'{flow.flow_id}' vs '{flow_id}'",
                )
            raise PyOpenMLError(
                "Flow does not exist on the server, but 'flow.flow_id' is not None."
            )
        if upload_flow and flow_id is False:
            flow.publish()
            flow_id = flow.flow_id
        elif flow_id:
            flow_from_server = get_flow(flow_id)
            _copy_server_fields(flow_from_server, flow)
            if avoid_duplicate_runs:
                flow_from_server.model = flow.model
                setup_id = setup_exists(flow_from_server)
                ids = run_exists(task.task_id, setup_id)
                if ids:
                    error_message = (
                        "One or more runs of this setup were already performed on the task."
                    )
                    raise OpenMLRunsExistError(ids, error_message)
        else:
            # Flow does not exist on server and we do not want to upload it.
            # No sync with the server happens.
            flow_id = None

    dataset = task.get_dataset()

    run_environment = flow.extension.get_version_information()
    tags = ["openml-python", run_environment[1]]

    if flow.extension.check_if_model_fitted(flow.model):
        warnings.warn(
            "The model is already fitted!"
            " This might cause inconsistency in comparison of results.",
            RuntimeWarning,
            stacklevel=2,
        )

    # execute the run
    res = _run_task_get_arffcontent(
        model=flow.model,
        task=task,
        extension=flow.extension,
        add_local_measures=add_local_measures,
        dataset_format=dataset_format,
        n_jobs=n_jobs,
    )

    data_content, trace, fold_evaluations, sample_evaluations = res
    fields = [*run_environment, time.strftime("%c"), "Created by run_flow_on_task"]
    generated_description = "\n".join(fields)
    run = OpenMLRun(
        task_id=task.task_id,
        flow_id=flow_id,
        dataset_id=dataset.dataset_id,
        model=flow.model,
        flow_name=flow.name,
        tags=tags,
        trace=trace,
        data_content=data_content,
        flow=flow,
        setup_string=flow.extension.create_setup_string(flow.model),
        description_text=generated_description,
    )

    if (upload_flow or avoid_duplicate_runs) and flow.flow_id is not None:
        # We only extract the parameter settings if a sync happened with the server.
        # I.e. when the flow was uploaded or we found it in the avoid_duplicate check.
        # Otherwise, we will do this at upload time.
        run.parameter_settings = flow.extension.obtain_parameter_values(flow)

    # now we need to attach the detailed evaluations
    if task.task_type_id == TaskType.LEARNING_CURVE:
        run.sample_evaluations = sample_evaluations
    else:
        run.fold_evaluations = fold_evaluations

    if flow_id:
        message = f"Executed Task {task.task_id} with Flow id:{run.flow_id}"
    else:
        message = f"Executed Task {task.task_id} on local Flow with name {flow.name}."
    config.logger.info(message)

    return run

`run_model_on_task(model, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, return_flow=False, dataset_format='dataframe', n_jobs=None)` ¶

Run the model on the dataset defined by the task.

Parameters:

Name	Type	Description	Default
`model`	`sklearn model`	A model which has a function fit(X,Y) and predict(X), all supervised estimators of scikit learn follow this definition of a model (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)	required
`task`	`OpenMLTask or int or str`	Task to perform or Task id. This may be a model instead if the first argument is an OpenMLTask.	required
`avoid_duplicate_runs`	`(bool, optional(default=True))`	If True, the run will throw an error if the setup/task combination is already present on the server. This feature requires an internet connection.	`True`
`flow_tags`	`(List[str], optional(default=None))`	A list of tags that the flow should have at creation.	`None`
`seed`	`int \| None`	Models that are not seeded will get this seed.	`None`
`add_local_measures`	`(bool, optional(default=True))`	Determines whether to calculate a set of evaluation measures locally, to later verify server behaviour.	`True`
`upload_flow`	`bool(default=False)`	If True, upload the flow to OpenML if it does not exist yet. If False, do not upload the flow to OpenML.	`False`
`return_flow`	`bool(default=False)`	If True, returns the OpenMLFlow generated from the model in addition to the OpenMLRun.	`False`
`dataset_format`	`str(default='dataframe')`	If 'array', the dataset is passed to the model as a numpy array. If 'dataframe', the dataset is passed to the model as a pandas dataframe.	`'dataframe'`
`n_jobs`	`int(default=None)`	The number of processes/threads to distribute the evaluation asynchronously. If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially. If `-1`, then the job uses as many cores available.	`None`

Returns:

Name	Type	Description
`run`	`OpenMLRun`	Result of the run.
`flow`	OpenMLFlow (optional, only if `return_flow` is True).	Flow generated from the model.

Source code in openml/runs/functions.py

def run_model_on_task(  # noqa: PLR0913
    model: Any,
    task: int | str | OpenMLTask,
    avoid_duplicate_runs: bool = True,  # noqa: FBT001, FBT002
    flow_tags: list[str] | None = None,
    seed: int | None = None,
    add_local_measures: bool = True,  # noqa: FBT001, FBT002
    upload_flow: bool = False,  # noqa: FBT001, FBT002
    return_flow: bool = False,  # noqa: FBT001, FBT002
    dataset_format: Literal["array", "dataframe"] = "dataframe",
    n_jobs: int | None = None,
) -> OpenMLRun | tuple[OpenMLRun, OpenMLFlow]:
    """Run the model on the dataset defined by the task.

    Parameters
    ----------
    model : sklearn model
        A model which has a function fit(X,Y) and predict(X),
        all supervised estimators of scikit learn follow this definition of a model
        (https://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
    task : OpenMLTask or int or str
        Task to perform or Task id.
        This may be a model instead if the first argument is an OpenMLTask.
    avoid_duplicate_runs : bool, optional (default=True)
        If True, the run will throw an error if the setup/task combination is already present on
        the server. This feature requires an internet connection.
    flow_tags : List[str], optional (default=None)
        A list of tags that the flow should have at creation.
    seed: int, optional (default=None)
        Models that are not seeded will get this seed.
    add_local_measures : bool, optional (default=True)
        Determines whether to calculate a set of evaluation measures locally,
        to later verify server behaviour.
    upload_flow : bool (default=False)
        If True, upload the flow to OpenML if it does not exist yet.
        If False, do not upload the flow to OpenML.
    return_flow : bool (default=False)
        If True, returns the OpenMLFlow generated from the model in addition to the OpenMLRun.
    dataset_format : str (default='dataframe')
        If 'array', the dataset is passed to the model as a numpy array.
        If 'dataframe', the dataset is passed to the model as a pandas dataframe.
    n_jobs : int (default=None)
        The number of processes/threads to distribute the evaluation asynchronously.
        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
        If `-1`, then the job uses as many cores available.

    Returns
    -------
    run : OpenMLRun
        Result of the run.
    flow : OpenMLFlow (optional, only if `return_flow` is True).
        Flow generated from the model.
    """
    if avoid_duplicate_runs and not config.apikey:
        warnings.warn(
            "avoid_duplicate_runs is set to True, but no API key is set. "
            "Please set your API key in the OpenML configuration file, see"
            "https://openml.github.io/openml-python/main/examples/20_basic/introduction_tutorial"
            ".html#authentication for more information on authentication.",
            RuntimeWarning,
            stacklevel=2,
        )

    # TODO: At some point in the future do not allow for arguments in old order (6-2018).
    # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
    # When removing this please also remove the method `is_estimator` from the extension
    # interface as it is only used here (MF, 3-2019)
    if isinstance(model, (int, str, OpenMLTask)):
        warnings.warn(
            "The old argument order (task, model) is deprecated and "
            "will not be supported in the future. Please use the "
            "order (model, task).",
            DeprecationWarning,
            stacklevel=2,
        )
        task, model = model, task

    extension = get_extension_by_model(model, raise_if_no_extension=True)
    if extension is None:
        # This should never happen and is only here to please mypy will be gone soon once the
        # whole function is removed
        raise TypeError(extension)

    flow = extension.model_to_flow(model)

    def get_task_and_type_conversion(_task: int | str | OpenMLTask) -> OpenMLTask:
        """Retrieve an OpenMLTask object from either an integer or string ID,
        or directly from an OpenMLTask object.

        Parameters
        ----------
        _task : Union[int, str, OpenMLTask]
            The task ID or the OpenMLTask object.

        Returns
        -------
        OpenMLTask
            The OpenMLTask object.
        """
        if isinstance(_task, (int, str)):
            return get_task(int(_task))  # type: ignore

        return _task

    task = get_task_and_type_conversion(task)

    run = run_flow_on_task(
        task=task,
        flow=flow,
        avoid_duplicate_runs=avoid_duplicate_runs,
        flow_tags=flow_tags,
        seed=seed,
        add_local_measures=add_local_measures,
        upload_flow=upload_flow,
        dataset_format=dataset_format,
        n_jobs=n_jobs,
    )
    if return_flow:
        return run, flow
    return run

functions

__list_runs(api_call, output_format='dict') ¶

delete_run(run_id) ¶

format_prediction(task, repeat, fold, index, prediction, truth, sample=None, proba=None) ¶

get_run(run_id, ignore_cache=False) ¶

get_run_trace(run_id) ¶

get_runs(run_ids) ¶

initialize_model_from_run(run_id) ¶

initialize_model_from_trace(run_id, repeat, fold, iteration=None) ¶

list_runs(offset=None, size=None, id=None, task=None, setup=None, flow=None, uploader=None, tag=None, study=None, display_errors=False, output_format='dict', **kwargs) ¶

run_exists(task_id, setup_id) ¶

run_flow_on_task(flow, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, dataset_format='dataframe', n_jobs=None) ¶

run_model_on_task(model, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, return_flow=False, dataset_format='dataframe', n_jobs=None) ¶

`__list_runs(api_call, output_format='dict')` ¶

`delete_run(run_id)` ¶

`format_prediction(task, repeat, fold, index, prediction, truth, sample=None, proba=None)` ¶

`get_run(run_id, ignore_cache=False)` ¶

`get_run_trace(run_id)` ¶

`get_runs(run_ids)` ¶

`initialize_model_from_run(run_id)` ¶

`initialize_model_from_trace(run_id, repeat, fold, iteration=None)` ¶

`list_runs(offset=None, size=None, id=None, task=None, setup=None, flow=None, uploader=None, tag=None, study=None, display_errors=False, output_format='dict', **kwargs)` ¶

`run_exists(task_id, setup_id)` ¶

`run_flow_on_task(flow, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, dataset_format='dataframe', n_jobs=None)` ¶

`run_model_on_task(model, task, avoid_duplicate_runs=True, flow_tags=None, seed=None, add_local_measures=True, upload_flow=False, return_flow=False, dataset_format='dataframe', n_jobs=None)` ¶