# Copyright 2025 - Oumi## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromtypingimportUnionimportpandasaspdfromoumi.core.datasetsimportBaseSftDatasetfromoumi.core.registryimportregister_datasetfromoumi.core.types.conversationimportConversation,Message
[docs]@register_dataset("allenai/WildChat-1M")classWildChatDataset(BaseSftDataset):"""Dataset class for the allenai/WildChat-1M dataset."""default_dataset="allenai/WildChat-1M"
[docs]deftransform_conversation(self,example:Union[dict,pd.Series])->Conversation:"""Transform a dataset example into a Conversation object."""raw_messages=example.get("conversation")ifraw_messagesisNone:raiseValueError("Invalid field, expected 'conversation'")messages=[Message.model_validate(message)formessageinraw_messages]returnConversation(messages=messages)