Source code for oumi.utils.io_utils
# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from pathlib import Path
from typing import Any, Union
import jsonlines
[docs]
def load_json(filename: Union[str, Path]) -> Any:
"""Load JSON data from a file.
Args:
filename: Path to the JSON file.
Returns:
dict: Parsed JSON data.
Raises:
FileNotFoundError: If the file doesn't exist.
json.JSONDecodeError: If the file contains invalid JSON.
"""
file_path = Path(filename)
if not file_path.exists():
raise FileNotFoundError(f"The file {filename} does not exist.")
with file_path.open("r", encoding="utf-8") as file:
return json.load(file)
[docs]
def save_json(
data: dict[str, Any], filename: Union[str, Path], indent: int = 2
) -> None:
"""Save data as a formatted JSON file.
Args:
data: The data to be saved as JSON.
filename: Path where the JSON file will be saved.
indent: Number of spaces for indentation. Defaults to 2.
Raises:
TypeError: If the data is not JSON serializable.
"""
file_path = Path(filename)
with file_path.open("w", encoding="utf-8") as file:
json.dump(data, file, indent=indent, ensure_ascii=False)
[docs]
def load_file(filename: Union[str, Path], encoding: str = "utf-8") -> str:
"""Load a file as a string.
Args:
filename: Path to the file.
encoding: Encoding to use when reading the file. Defaults to "utf-8".
Returns:
str: The content of the file.
Raises:
FileNotFoundError: If the file doesn't exist.
"""
file_path = Path(filename)
if not file_path.exists():
raise FileNotFoundError(f"The file {filename} does not exist.")
with file_path.open("r", encoding=encoding) as file:
return file.read()
[docs]
def get_oumi_root_directory() -> Path:
"""Get the root directory of the Oumi project.
Returns:
Path: The absolute path to the Oumi project's root directory.
"""
return Path(__file__).parent.parent.resolve()
[docs]
def load_jsonlines(filename: Union[str, Path]) -> list[dict[str, Any]]:
"""Load a jsonlines file.
Args:
filename: Path to the jsonlines file.
Returns:
List[Dict[str, Any]]: A list of dictionaries, each representing a
JSON object from the file.
Raises:
FileNotFoundError: If the file doesn't exist.
jsonlines.InvalidLineError: If the file contains invalid JSON.
"""
file_path = Path(filename)
if file_path.is_dir():
raise ValueError(
f"Provided path is a directory, expected a file: '{filename}'."
)
if not file_path.is_file():
raise FileNotFoundError(f"Provided path does not exist: '{filename}'.")
with jsonlines.open(file_path) as reader:
return list(reader)
[docs]
def save_jsonlines(filename: Union[str, Path], data: list[dict[str, Any]]) -> None:
"""Save a list of dictionaries to a jsonlines file.
Args:
filename: Path to the jsonlines file to be created or overwritten.
data: A list of dictionaries to be saved as JSON objects.
Raises:
IOError: If there's an error writing to the file.
"""
file_path = Path(filename)
try:
with jsonlines.open(file_path, mode="w") as writer:
writer.write_all(data)
except OSError as e:
raise OSError(f"Error writing to file {filename}") from e