Source code for oumi.analyze.testing.results

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Test result models for the test engine."""

from typing import Any

from pydantic import BaseModel, Field

from oumi.core.configs.params.test_params import TestSeverity



[docs]
class TestResult(BaseModel):
    """Result of a single test execution.

    Attributes:
        test_id: Unique identifier for the test.
        passed: Whether the test passed.
        severity: Severity level of the test.
        title: Human-readable title.
        description: Description of what the test checks.
        metric: The metric being tested (e.g., "analyzer_name.field").
        affected_count: Number of samples that failed the test.
        total_count: Total number of samples tested.
        affected_percentage: Percentage of samples affected.
        threshold: The configured threshold for the test.
        actual_value: The actual computed value (for threshold tests).
        sample_indices: Indices of affected samples (limited).
        error: Error message if test execution failed.
        details: Additional details about the test result.
    """

    test_id: str
    passed: bool
    severity: TestSeverity = TestSeverity.MEDIUM
    title: str = ""
    description: str = ""
    metric: str = ""
    affected_count: int = 0
    total_count: int = 0
    affected_percentage: float = 0.0
    threshold: float | None = None
    actual_value: float | None = None
    sample_indices: list[int] = Field(default_factory=list)
    error: str | None = None
    details: dict[str, Any] = Field(default_factory=dict)


[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary representation."""
        return self.model_dump()





[docs]
class TestSummary(BaseModel):
    """Summary of all test results.

    Attributes:
        results: List of individual test results.
        total_tests: Total number of tests run.
        passed_tests: Number of tests that passed.
        failed_tests: Number of tests that failed.
        error_tests: Number of tests that had errors.
        pass_rate: Percentage of tests that passed.
        high_severity_failures: Number of high severity failures.
        medium_severity_failures: Number of medium severity failures.
        low_severity_failures: Number of low severity failures.
    """

    results: list[TestResult] = Field(default_factory=list)
    total_tests: int = 0
    passed_tests: int = 0
    failed_tests: int = 0
    error_tests: int = 0
    pass_rate: float = 0.0
    high_severity_failures: int = 0
    medium_severity_failures: int = 0
    low_severity_failures: int = 0


[docs]
    @classmethod
    def from_results(cls, results: list[TestResult]) -> "TestSummary":
        """Create a summary from a list of test results.

        Args:
            results: List of test results.

        Returns:
            TestSummary with computed statistics.
        """
        total = len(results)
        passed = sum(1 for r in results if r.passed and not r.error)
        errors = sum(1 for r in results if r.error)
        failed = total - passed - errors

        high_failures = sum(
            1 for r in results if not r.passed and r.severity == TestSeverity.HIGH
        )
        medium_failures = sum(
            1 for r in results if not r.passed and r.severity == TestSeverity.MEDIUM
        )
        low_failures = sum(
            1 for r in results if not r.passed and r.severity == TestSeverity.LOW
        )

        return cls(
            results=results,
            total_tests=total,
            passed_tests=passed,
            failed_tests=failed,
            error_tests=errors,
            pass_rate=round(100.0 * passed / total, 1) if total > 0 else 0.0,
            high_severity_failures=high_failures,
            medium_severity_failures=medium_failures,
            low_severity_failures=low_failures,
        )



[docs]
    def get_passed_results(self) -> list[TestResult]:
        """Get all passed test results."""
        return [r for r in self.results if r.passed]



[docs]
    def get_failed_results(self) -> list[TestResult]:
        """Get all failed test results."""
        return [r for r in self.results if not r.passed and not r.error]



[docs]
    def get_error_results(self) -> list[TestResult]:
        """Get all test results with errors."""
        return [r for r in self.results if r.error]



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary representation."""
        return self.model_dump()