Source code for oumi.core.configs.evaluation_config
# Copyright 2025 - Oumi## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromdataclassesimportdataclass,fieldfromtypingimportOptionalfromoumi.core.configs.base_configimportBaseConfigfromoumi.core.configs.inference_engine_typeimportInferenceEngineTypefromoumi.core.configs.params.evaluation_paramsimportEvaluationTaskParamsfromoumi.core.configs.params.generation_paramsimportGenerationParamsfromoumi.core.configs.params.model_paramsimportModelParamsfromoumi.core.configs.params.remote_paramsimportRemoteParamsfromoumi.utils.str_utilsimportsanitize_run_name
[docs]@dataclassclassEvaluationConfig(BaseConfig):tasks:list[EvaluationTaskParams]=field(default_factory=list)"""List of all the evaluation tasks to run."""model:ModelParams=field(default_factory=ModelParams)"""Parameters for the model to be evaluated. This includes model architecture, size, dtype, and any specific configurations required for the evaluation task. """generation:GenerationParams=field(default_factory=GenerationParams)"""Parameters for text generation during evaluation. This includes settings such as temperature, top-k, top-p, maximum length, and any other parameters that control the text generation process. """inference_engine:InferenceEngineType=InferenceEngineType.NATIVE"""For evaluation tasks that require an inference step, such as AlpacaEval tasks, an inference engine is required to generate model responses. This parameter specifies the inference engine to use for generation. If not defined, the default is the `NATIVE` inference engine."""inference_remote_params:Optional[RemoteParams]=None"""For evaluation tasks that require an inference step, such as AlpacaEval tasks, an inference engine is required to generate model responses. If the model is accessed via a remote API, these parameters specify how to run inference against the remote API."""run_name:Optional[str]=None"""A unique identifier for the current training run. This name is used to identify the run in Weights & Biases. """enable_wandb:bool=False"""Whether to enable Weights & Biases (wandb) logging. Currently, this is only supported for LM Harness evaluation. If True, wandb will be used for experiment tracking and visualization. After enabling, you must set the `WANDB_API_KEY` environment variable. Alternatively, you can use the `wandb login` command to authenticate. """output_dir:str="output""""Where to write computed evaluations."""