Source code for oumi.builders.rewards
# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Callable, Mapping
from functools import wraps
from typing import Any
from oumi.core.configs import TrainingParams
from oumi.core.registry import REGISTRY
def _apply_reward_function_kwargs(
reward_func: Callable, reward_function_kwargs: dict[str, Any] | None
) -> Callable:
if not reward_function_kwargs:
return reward_func
@wraps(reward_func)
def _wrapped(*args: Any, **kwargs: Any) -> Any:
# Configured kwargs take precedence over per-sample or call-time kwargs.
merged_kwargs = {**kwargs, **reward_function_kwargs}
return reward_func(*args, **merged_kwargs)
return _wrapped
[docs]
def build_reward_functions(config: TrainingParams) -> list[Callable]:
"""Builds the reward functions.
Example config (YAML)::
reward_functions:
- rubric_reward
- gsm8k
reward_function_kwargs:
rubric_reward:
judge_panel_path: "configs/.../judge_panel.yaml"
gsm8k:
strict: true
"""
if not config.reward_functions:
return []
# Import to ensure GRPO reward functions are added to REGISTRY.
import oumi.datasets.grpo.rewards as grpo_rewards # noqa: F401
function_names = [name for name in config.reward_functions if name]
kwargs_map = config.reward_function_kwargs or {}
if kwargs_map:
if not isinstance(kwargs_map, Mapping):
raise ValueError(
"reward_function_kwargs must be a dict keyed by reward "
"function name, e.g. {rubric_reward: {judge_panel_path: ...}}."
)
unexpected_keys = set(kwargs_map.keys()) - set(function_names)
if unexpected_keys:
raise ValueError(
"reward_function_kwargs must be a dict keyed by reward function "
"name. Unexpected keys not listed in reward_functions: "
f"{sorted(unexpected_keys)}."
)
for key, value in kwargs_map.items():
if not isinstance(value, Mapping):
raise ValueError(
"reward_function_kwargs entries must be dicts. "
f"Entry '{key}' is {type(value).__name__}. Use "
"reward_function_kwargs: {<func_name>: {arg: value}}."
)
result: list[Callable] = []
for name in function_names:
reward_function = REGISTRY.get_reward_function(name)
if not reward_function:
raise KeyError(f"reward_function `{name}` was not found in the registry.")
result.append(
_apply_reward_function_kwargs(reward_function, kwargs_map.get(name))
)
return result