oumi.core.collators#

Submodules#

oumi.core.collators.text_collator_with_padding module#

class oumi.core.collators.text_collator_with_padding.TextCollatorWithPadding(tokenizer: PreTrainedTokenizerBase, *, max_length: int | None, truncation: bool = False, label_ignore_index: int | None = None)[source]#

Bases: object

__call__(batch) dict[str, Any][source]#

Pads to the longest length present in the batch.

Parameters:

batch – List of batch items.

Returns:

Processed batch.

Return type:

Dict[str, torch.Tensor]

oumi.core.collators.text_completions_collator_with_padding module#

class oumi.core.collators.text_completions_collator_with_padding.TextCompletionsCollatorWithPadding(tokenizer: PreTrainedTokenizerBase, instruction_prefix: str, response_prefix: str)[source]#

Bases: object

__call__(batch) dict[str, Any][source]#

Pads to the longest length present in the batch.

Parameters:

batch – List of batch items.

Returns:

Processed batch.

Return type:

Dict[str, torch.Tensor]

oumi.core.collators.vision_language_collator_with_padding module#

class oumi.core.collators.vision_language_collator_with_padding.VisionLanguageCollatorWithPadding(tokenizer: PreTrainedTokenizerBase, *, max_length: int | None, truncation: bool = False, label_ignore_index: int | None = None)[source]#

Bases: object

__call__(batch) dict[str, Any][source]#

Custom collator for multi-modal vision-language training.

Parameters:

batch – List of batch items.

Returns:

Processed batch.

Return type:

Dict[str, torch.Tensor]

collate_images(images) Tensor[source]#

Collate images for multi-modal training.

Parameters:

images – List of images to collate.

Returns:

Batch of processed images.

Return type:

torch.Tensor