Source code for thesis.core.config.loaders
"""YAML configuration loaders and utilities."""
from pathlib import Path
from typing import cast
import yaml
from thesis.core.logging import get_logger
logger = get_logger(__name__)
__all__ = ["load_yaml", "save_yaml", "merge_configs"]
ConfigDict = dict[str, object]
[docs]
def load_yaml(file_path: str | Path) -> ConfigDict:
"""
Load a YAML configuration file.
Args:
file_path: Path to YAML file
Returns:
Dictionary with config data
Raises:
FileNotFoundError: If file doesn't exist
ValueError: If YAML is invalid
Example:
>>> config = load_yaml("./config/default.yaml")
>>> print(config["preprocessing"]["threads"])
"""
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"Config file not found: {file_path}")
try:
with open(file_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
logger.debug(f"Loaded YAML config: {file_path}")
return cast(ConfigDict, config or {})
except yaml.YAMLError as e:
logger.error(f"Error loading YAML from {file_path}: {e}")
raise ValueError(f"Invalid YAML in {file_path}: {e}")
[docs]
def save_yaml(config: ConfigDict, file_path: str | Path) -> None:
"""
Save a configuration to a YAML file.
Args:
config: Configuration dictionary to save
file_path: Path where to save the file
Example:
>>> config = {"threads": 4}
>>> save_yaml(config, "./my_config.yaml")
"""
file_path = Path(file_path)
file_path.parent.mkdir(parents=True, exist_ok=True)
try:
with open(file_path, "w", encoding="utf-8") as f:
yaml.safe_dump(config, f, default_flow_style=False, sort_keys=False)
logger.debug(f"Saved config to: {file_path}")
except Exception as e:
logger.error(f"Error saving YAML to {file_path}: {e}")
raise
[docs]
def merge_configs(*configs: ConfigDict) -> ConfigDict:
"""
Merge multiple configurations with later configs overriding earlier ones.
Deep merges dictionaries, with later configs taking precedence.
Args:
*configs: Variable number of config dictionaries to merge
Returns:
Merged dictionary
Example:
>>> base = {"a": 1, "b": {"x": 2}}
>>> override = {"b": {"y": 3}, "c": 4}
>>> merged = merge_configs(base, override)
>>> print(merged["b"]) # {"x": 2, "y": 3}
"""
if not configs:
return {}
# Filter out None values
valid_configs = [c for c in configs if c is not None]
if not valid_configs:
return {}
if len(valid_configs) == 1:
return valid_configs[0].copy()
# Deep merge from left to right
result: ConfigDict = {}
for config in valid_configs:
result = _deep_merge(result, config)
logger.debug(f"Merged {len(valid_configs)} configurations")
return result
def _deep_merge(base: ConfigDict, override: ConfigDict) -> ConfigDict:
"""
Deep merge two dictionaries.
Args:
base: Base dictionary
override: Override dictionary
Returns:
Merged dictionary
"""
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = _deep_merge(cast(ConfigDict, result[key]), cast(ConfigDict, value))
else:
result[key] = value
return result