Source code for oumi.datasets.vision_language.pixmo_cap
# Copyright 2025 - Oumi## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromtyping_extensionsimportoverride# noqa: I001fromoumi.core.datasetsimportVisionLanguageSftDatasetfromoumi.core.registryimportregister_datasetfromoumi.core.types.conversationimport(ContentItem,Conversation,Message,Role,Type,)
[docs]@register_dataset("allenai/pixmo-cap")classPixmoCapDataset(VisionLanguageSftDataset):"""Dataset class for the `allenai/pixmo-cap` dataset. The dataset is affected by some image URLs having a 404 issue. """default_dataset="allenai/pixmo-cap"
[docs]@overridedeftransform_conversation(self,example:dict)->Conversation:"""Transform the example into a Conversation object. A "transcripts" column is also available but not used yet. """input_text="Describe this image:"messages:list[Message]=[]messages.append(Message(role=Role.USER,content=[ContentItem(type=Type.IMAGE_URL,content=example["image_url"]),ContentItem(type=Type.TEXT,content=input_text),],))messages.append(Message(role=Role.ASSISTANT,content=example["caption"]))returnConversation(messages=messages)