Source code for oumi.core.configs.params.profiler_params

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass, field
from typing import Optional

from oumi.core.configs.params.base_params import BaseParams


[docs] @dataclass class ProfilerScheduleParams(BaseParams): """Parameters that define what subset of training steps to profile. Keeping profiling enabled for all training steps may be impractical as it may result in out-of-memory errors, extremely large trace files, and may interfere with regular training performance. This config can be used to enable PyTorch profiler only for a small number of training steps, which is not affected by such issues, and may still provide a useful signal for performance analysis. """ enable_schedule: bool = False """Whether profiling schedule is enabled. If `False`, then profiling is enabled for the entire process duration, and all schedule parameters below will be ignored. """ wait: int = 0 """The number of training steps to skip at the beginning of each profiling cycle (`ProfilerAction.NONE`). Each cycle includes `wait + warmup + active` steps. """ warmup: int = 1 """The number of training steps to do profiling warmup (`ProfilerAction.WARMUP`) in each profiling cycle. """ active: int = 3 """The number of training steps to do active recording (`ProfilerAction.RECORD`) in each profiling cycle. """ repeat: int = 1 """The optional number of profiling cycles. Each cycle includes `wait + warmup + active` steps. The zero value means that the cycles will continue until the profiling is finished. """ skip_first: int = 1 """The number of initial training steps to skip at the beginning of profiling session (`ProfilerAction.NONE`). """
[docs] def __post_init__(self): """Verifies params.""" if not ( self.wait >= 0 and self.warmup >= 0 and self.active > 0 and self.repeat >= 0 and self.skip_first >= 0 ): raise ValueError( "Invalid profiler schedule arguments. The parameters " "wait: {self.wait}, warmup: {self.warmup}, repeat: {self.repeat}" "skip_first: {self.skip_first} must be non-negative." ) if not (self.active > 0): raise ValueError( "Invalid profiler schedule arguments. The parameter " "active: {self.active} must be positive." )
[docs] @dataclass class ProfilerParams(BaseParams): save_dir: Optional[str] = None """Directory where the profiling data will be saved to. If not specified and profiling is enabled, then the `profiler` sub-dir will be used under `output_dir`. """ enable_cpu_profiling: bool = False """Whether to profile CPU activity. Corresponds to `torch.profiler.ProfilerActivity.CPU`. """ enable_cuda_profiling: bool = False """Whether to profile CUDA. Corresponds to `torch.profiler.ProfilerActivity.CUDA`. """ record_shapes: bool = False """Save information about operator’s input shapes.""" profile_memory: bool = False """Track tensor memory allocation/deallocation.""" with_stack: bool = False """Record source information (file and line number) for the ops.""" with_flops: bool = False """Record module hierarchy (including function names) corresponding to the callstack of the op. """ with_modules: bool = False """Use formula to estimate the FLOPs (floating point operations) of specific operators (matrix multiplication and 2D convolution). """ row_limit: int = 50 """Max number of rows to include into profiling report tables. Set to -1 to make it unlimited. """ schedule: ProfilerScheduleParams = field(default_factory=ProfilerScheduleParams) """Parameters that define what subset of training steps to profile."""