Source code for oumi.cli.infer
# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Annotated, Optional
import typer
import oumi.cli.cli_utils as cli_utils
from oumi.utils.logging import logger
[docs]
def infer(
ctx: typer.Context,
config: Annotated[
Optional[str],
typer.Option(
*cli_utils.CONFIG_FLAGS,
help="Path to the configuration file for inference.",
),
] = None,
interactive: Annotated[
bool,
typer.Option("-i", "--interactive", help="Run in an interactive session."),
] = False,
image: Annotated[
Optional[str],
typer.Option(
"--image",
help=(
"File path or URL of an input image to be used with image+text VLLMs. "
"Only used in interactive mode."
),
),
] = None,
level: cli_utils.LOG_LEVEL_TYPE = None,
):
"""Run inference on a model.
If `input_filepath` is provided in the configuration file, inference will run on
those input examples. Otherwise, inference will run interactively with user-provided
inputs.
Args:
ctx: The Typer context object.
config: Path to the configuration file for inference.
interactive: Whether to run in an interactive session.
image: Path to the input image for `image+text` VLLMs.
level: The logging level for the specified command.
"""
extra_args = cli_utils.parse_extra_cli_args(ctx)
# Delayed imports
from oumi import infer as oumi_infer
from oumi import infer_interactive as oumi_infer_interactive
from oumi.core.configs import InferenceConfig
from oumi.utils.image_utils import (
load_image_png_bytes_from_path,
load_image_png_bytes_from_url,
)
# End imports
parsed_config: InferenceConfig = InferenceConfig.from_yaml_and_arg_list(
config, extra_args, logger=logger
)
parsed_config.finalize_and_validate()
# https://stackoverflow.com/questions/62691279/how-to-disable-tokenizers-parallelism-true-false-warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"
input_image_png_bytes: Optional[bytes] = None
if image:
image_lower = image.lower()
if image_lower.startswith("http://") or image_lower.startswith("https://"):
input_image_png_bytes = load_image_png_bytes_from_url(image)
else:
input_image_png_bytes = load_image_png_bytes_from_path(image)
if interactive:
if parsed_config.input_path:
logger.warning(
"Interactive inference requested, skipping reading from "
"`input_path`."
)
return oumi_infer_interactive(
parsed_config, input_image_bytes=input_image_png_bytes
)
if parsed_config.input_path is None:
raise ValueError("One of `--interactive` or `input_path` must be provided.")
generations = oumi_infer(parsed_config)
# Don't print results if output_filepath is provided.
if parsed_config.output_path:
return
for generation in generations:
print("------------")
print(repr(generation))
print("------------")