Source code for oumi.core.configs.params.profiler_params

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from oumi.core.configs.params.base_params import BaseParams



[docs]
@dataclass
class ProfilerScheduleParams(BaseParams):
    """Parameters that define what subset of training steps to profile.

    Keeping profiling enabled for all training steps may be impractical
    as it may result in out-of-memory errors, extremely large trace files,
    and may interfere with regular training performance. This config can be used
    to enable PyTorch profiler only for a small number of training steps,
    which is not affected by such issues, and may still provide a useful signal
    for performance analysis.
    """

    enable_schedule: bool = False
    """Whether profiling schedule is enabled.

    If `False`, then profiling is enabled for the entire process
    duration, and all schedule parameters below will be ignored.
    """

    wait: int = 0
    """The number of training steps to skip at the beginning of
    each profiling cycle (`ProfilerAction.NONE`).
    Each cycle includes `wait + warmup + active` steps.
    """

    warmup: int = 1
    """The number of training steps to do profiling warmup (`ProfilerAction.WARMUP`)
    in each profiling cycle.
    """

    active: int = 3
    """The number of training steps to do active recording (`ProfilerAction.RECORD`)
    in each profiling cycle.
    """

    repeat: int = 1
    """The optional number of profiling cycles.

    Each cycle includes `wait + warmup + active` steps. The zero value means that
    the cycles will continue until the profiling is finished.
    """

    skip_first: int = 1
    """The number of initial training steps to skip at the beginning of profiling
    session (`ProfilerAction.NONE`).
    """


[docs]
    def __post_init__(self):
        """Verifies params."""
        if not (
            self.wait >= 0
            and self.warmup >= 0
            and self.active > 0
            and self.repeat >= 0
            and self.skip_first >= 0
        ):
            raise ValueError(
                "Invalid profiler schedule arguments. The parameters "
                "wait: {self.wait}, warmup: {self.warmup}, repeat: {self.repeat}"
                "skip_first: {self.skip_first} must be non-negative."
            )
        if not (self.active > 0):
            raise ValueError(
                "Invalid profiler schedule arguments. The parameter "
                "active: {self.active} must be positive."
            )





[docs]
@dataclass
class ProfilerParams(BaseParams):
    save_dir: Optional[str] = None
    """Directory where the profiling data will be saved to.

    If not specified and profiling is enabled, then the `profiler` sub-dir will be
    used under `output_dir`.
    """

    enable_cpu_profiling: bool = False
    """Whether to profile CPU activity.

    Corresponds to `torch.profiler.ProfilerActivity.CPU`.
    """

    enable_cuda_profiling: bool = False
    """Whether to profile CUDA.

    Corresponds to `torch.profiler.ProfilerActivity.CUDA`.
    """

    record_shapes: bool = False
    """Save information about operator’s input shapes."""

    profile_memory: bool = False
    """Track tensor memory allocation/deallocation."""

    with_stack: bool = False
    """Record source information (file and line number) for the ops."""

    with_flops: bool = False
    """Record module hierarchy (including function names) corresponding to
    the callstack of the op.
    """

    with_modules: bool = False
    """Use formula to estimate the FLOPs (floating point operations) of
    specific operators (matrix multiplication and 2D convolution).
    """

    row_limit: int = 50
    """Max number of rows to include into profiling report tables.

    Set to -1 to make it unlimited.
    """

    schedule: ProfilerScheduleParams = field(default_factory=ProfilerScheduleParams)
    """Parameters that define what subset of training steps to profile."""