Source code for oumi.core.evaluation.evaluation_result
# Copyright 2025 - Oumi## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromtypingimportAny,Optional
[docs]classEvaluationResult:"""Class that retains the evaluation results generated by all backends. Attributes: task_name: The name of the task on which the model was evaluated. task_result: The result of evaluating on the task. This is a dictionary where the keys are metric names and the values are their corresponding values. The captured metrics vary based on the backend and the specific task. backend_config: The configuration of the backend. This is a dictionary including configuration parameters that are specific to the backend used to evaluate on the task. They are retained for reproducibility. start_time: A human-friendly string (recommended format is: YYYYMMDD_HHMMSS) that indicates the date and time when the evaluation started. elapsed_time_sec: The duration to complete the evaluation (in seconds). """def__init__(self,task_name:Optional[str]=None,task_result:Optional[dict[str,Any]]=None,backend_config:Optional[dict[str,Any]]=None,start_time:Optional[str]=None,elapsed_time_sec:Optional[int]=None,):"""Initialize the EvaluationResult class."""self.task_name=task_nameself.task_result=task_resultor{}self.backend_config=backend_configor{}self.start_time=start_timeself.elapsed_time_sec=elapsed_time_sec
[docs]defto_dict(self)->dict[str,Any]:"""Convert the EvaluationResult to a dictionary."""return{"task_name":self.task_name,"task_result":self.task_result,"backend_config":self.backend_config,"start_time":self.start_time,"elapsed_time_sec":self.elapsed_time_sec,}
[docs]defget_results(self)->dict[str,Any]:"""Retrieves the dict of results."""ifnotself.task_result:return{}if("results"notinself.task_resultorself.task_namenotinself.task_result["results"]):raiseValueError("Unknown `task_result` format.")returnself.task_result["results"][self.task_name]