Source code for oumi.analyze

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Analyzer framework for dataset analysis."""

from oumi.analyze.analyzers import (
    LengthAnalyzer,
    LengthMetrics,
    TurnStatsAnalyzer,
    TurnStatsMetrics,
)
from oumi.analyze.base import (
    BaseAnalyzer,
    ConversationAnalyzer,
    DatasetAnalyzer,
    MessageAnalyzer,
    PreferenceAnalyzer,
)
from oumi.analyze.config import (
    AnalyzerConfig,
    TypedAnalyzeConfig,
)
from oumi.analyze.discovery import (
    describe_analyzer,
    get_analyzer_info,
    list_available_metrics,
    print_analyzer_metrics,
)
from oumi.analyze.pipeline import AnalysisPipeline
from oumi.analyze.testing import TestEngine, TestResult, TestSummary
from oumi.analyze.utils.dataframe import to_analysis_dataframe
from oumi.core.registry import (
    REGISTRY,
)
from oumi.core.registry import (
    register_sample_analyzer as register_analyzer,
)


[docs] def get_analyzer_class(name: str) -> type | None: """Get an analyzer class by name. Args: name: Name of the analyzer. Returns: The analyzer class or None if not found. """ from typing import cast result = REGISTRY.get_sample_analyzer(name) return cast(type | None, result)
[docs] def create_analyzer_from_config( analyzer_id: str, params: dict, ) -> "MessageAnalyzer | ConversationAnalyzer | DatasetAnalyzer | None": """Create an analyzer instance from configuration. Args: analyzer_id: Analyzer type identifier. params: Analyzer-specific parameters. Returns: Analyzer instance or None if not found. """ import logging logger = logging.getLogger(__name__) analyzer_class = REGISTRY.get_sample_analyzer(analyzer_id) if analyzer_class is None: logger.warning(f"Unknown analyzer: {analyzer_id}") return None try: return analyzer_class(**params) except Exception as e: logger.error(f"Failed to create analyzer {analyzer_id}: {e}") return None
__all__ = [ "BaseAnalyzer", "MessageAnalyzer", "ConversationAnalyzer", "DatasetAnalyzer", "PreferenceAnalyzer", "LengthAnalyzer", "LengthMetrics", "TurnStatsAnalyzer", "TurnStatsMetrics", "AnalysisPipeline", "to_analysis_dataframe", "TypedAnalyzeConfig", "AnalyzerConfig", "list_available_metrics", "print_analyzer_metrics", "get_analyzer_info", "describe_analyzer", "register_analyzer", "get_analyzer_class", "create_analyzer_from_config", "TestEngine", "TestResult", "TestSummary", ]