"""
AdamOps Configuration Module
Provides centralized configuration management for the entire library.
Supports YAML, JSON, and environment variable configurations.
"""
import os
import json
from pathlib import Path
from typing import Any, Dict, Optional, Union
from dataclasses import dataclass, field
try:
import yaml
YAML_AVAILABLE = True
except ImportError:
YAML_AVAILABLE = False
try:
from dotenv import load_dotenv
DOTENV_AVAILABLE = True
except ImportError:
DOTENV_AVAILABLE = False
[docs]
@dataclass
class DataConfig:
"""Configuration for data module."""
default_encoding: str = "utf-8"
missing_threshold: float = 0.5
outlier_method: str = "iqr"
outlier_threshold: float = 1.5
validation_sample_size: int = 10000
auto_detect_types: bool = True
[docs]
@dataclass
class ModelConfig:
"""Configuration for model module."""
default_random_state: int = 42
cv_folds: int = 5
early_stopping_rounds: int = 50
n_jobs: int = -1
verbose: int = 0
[docs]
@dataclass
class AutoMLConfig:
"""Configuration for AutoML module."""
time_limit: int = 3600
max_trials: int = 100
tuning_method: str = "bayesian"
optimization_metric: str = "auto"
early_stopping: bool = True
[docs]
@dataclass
class DeploymentConfig:
"""Configuration for deployment module."""
default_port: int = 8000
default_host: str = "0.0.0.0"
api_framework: str = "fastapi"
enable_cors: bool = True
log_requests: bool = True
[docs]
@dataclass
class MonitoringConfig:
"""Configuration for monitoring module."""
drift_threshold: float = 0.05
alert_email: Optional[str] = None
check_interval: int = 3600
log_predictions: bool = True
[docs]
@dataclass
class LoggingConfig:
"""Configuration for logging."""
level: str = "INFO"
format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: Optional[str] = None
console: bool = True
max_bytes: int = 10485760 # 10MB
backup_count: int = 5
[docs]
@dataclass
class AdamOpsConfig:
"""Main configuration class for AdamOps."""
data: DataConfig = field(default_factory=DataConfig)
model: ModelConfig = field(default_factory=ModelConfig)
automl: AutoMLConfig = field(default_factory=AutoMLConfig)
deployment: DeploymentConfig = field(default_factory=DeploymentConfig)
monitoring: MonitoringConfig = field(default_factory=MonitoringConfig)
logging: LoggingConfig = field(default_factory=LoggingConfig)
# Registry settings
registry_backend: str = "json" # json or sqlite
registry_path: str = ".adamops_registry"
# General settings
cache_enabled: bool = True
cache_path: str = ".adamops_cache"
# Global configuration instance
_config: Optional[AdamOpsConfig] = None
[docs]
def get_config() -> AdamOpsConfig:
"""
Get the global configuration instance.
Returns:
AdamOpsConfig: The global configuration object.
Example:
>>> config = get_config()
>>> print(config.model.cv_folds)
5
"""
global _config
if _config is None:
_config = AdamOpsConfig()
return _config
[docs]
def set_config(config: AdamOpsConfig) -> None:
"""
Set the global configuration instance.
Args:
config: The configuration object to set as global.
Example:
>>> custom_config = AdamOpsConfig()
>>> custom_config.model.cv_folds = 10
>>> set_config(custom_config)
"""
global _config
_config = config
[docs]
def reset_config() -> None:
"""
Reset the global configuration to defaults.
Example:
>>> reset_config()
>>> config = get_config()
>>> print(config.model.cv_folds)
5
"""
global _config
_config = AdamOpsConfig()
[docs]
def load_config_from_file(filepath: Union[str, Path]) -> AdamOpsConfig:
"""
Load configuration from a YAML or JSON file.
Args:
filepath: Path to the configuration file.
Returns:
AdamOpsConfig: Loaded configuration object.
Raises:
FileNotFoundError: If the file doesn't exist.
ValueError: If the file format is not supported.
Example:
>>> config = load_config_from_file("config.yaml")
"""
filepath = Path(filepath)
if not filepath.exists():
raise FileNotFoundError(f"Configuration file not found: {filepath}")
with open(filepath, "r", encoding="utf-8") as f:
if filepath.suffix in [".yaml", ".yml"]:
if not YAML_AVAILABLE:
raise ImportError("PyYAML is required to load YAML config files. Install with: pip install pyyaml")
config_dict = yaml.safe_load(f)
elif filepath.suffix == ".json":
config_dict = json.load(f)
else:
raise ValueError(f"Unsupported config file format: {filepath.suffix}")
return _dict_to_config(config_dict)
[docs]
def save_config_to_file(config: AdamOpsConfig, filepath: Union[str, Path]) -> None:
"""
Save configuration to a YAML or JSON file.
Args:
config: Configuration object to save.
filepath: Path to save the configuration to.
Example:
>>> config = get_config()
>>> save_config_to_file(config, "config.yaml")
"""
filepath = Path(filepath)
filepath.parent.mkdir(parents=True, exist_ok=True)
config_dict = _config_to_dict(config)
with open(filepath, "w", encoding="utf-8") as f:
if filepath.suffix in [".yaml", ".yml"]:
if not YAML_AVAILABLE:
raise ImportError("PyYAML is required to save YAML config files. Install with: pip install pyyaml")
yaml.dump(config_dict, f, default_flow_style=False, indent=2)
elif filepath.suffix == ".json":
json.dump(config_dict, f, indent=2)
else:
raise ValueError(f"Unsupported config file format: {filepath.suffix}")
[docs]
def load_config_from_env(prefix: str = "ADAMOPS") -> AdamOpsConfig:
"""
Load configuration from environment variables.
Environment variables should be named as {prefix}_{SECTION}_{KEY}.
For example: ADAMOPS_MODEL_CV_FOLDS=10
Args:
prefix: Prefix for environment variables.
Returns:
AdamOpsConfig: Configuration with values from environment.
Example:
>>> # Set env: ADAMOPS_MODEL_CV_FOLDS=10
>>> config = load_config_from_env()
>>> print(config.model.cv_folds)
10
"""
if DOTENV_AVAILABLE:
load_dotenv()
config = AdamOpsConfig()
# Map of environment variable suffixes to config attributes
env_mappings = {
# Data config
f"{prefix}_DATA_DEFAULT_ENCODING": ("data", "default_encoding", str),
f"{prefix}_DATA_MISSING_THRESHOLD": ("data", "missing_threshold", float),
f"{prefix}_DATA_OUTLIER_METHOD": ("data", "outlier_method", str),
f"{prefix}_DATA_OUTLIER_THRESHOLD": ("data", "outlier_threshold", float),
# Model config
f"{prefix}_MODEL_RANDOM_STATE": ("model", "default_random_state", int),
f"{prefix}_MODEL_CV_FOLDS": ("model", "cv_folds", int),
f"{prefix}_MODEL_N_JOBS": ("model", "n_jobs", int),
# AutoML config
f"{prefix}_AUTOML_TIME_LIMIT": ("automl", "time_limit", int),
f"{prefix}_AUTOML_MAX_TRIALS": ("automl", "max_trials", int),
f"{prefix}_AUTOML_TUNING_METHOD": ("automl", "tuning_method", str),
# Deployment config
f"{prefix}_DEPLOY_PORT": ("deployment", "default_port", int),
f"{prefix}_DEPLOY_HOST": ("deployment", "default_host", str),
f"{prefix}_DEPLOY_FRAMEWORK": ("deployment", "api_framework", str),
# Monitoring config
f"{prefix}_MONITOR_DRIFT_THRESHOLD": ("monitoring", "drift_threshold", float),
f"{prefix}_MONITOR_CHECK_INTERVAL": ("monitoring", "check_interval", int),
# Logging config
f"{prefix}_LOG_LEVEL": ("logging", "level", str),
f"{prefix}_LOG_FILE": ("logging", "file", str),
# General settings
f"{prefix}_REGISTRY_BACKEND": (None, "registry_backend", str),
f"{prefix}_REGISTRY_PATH": (None, "registry_path", str),
}
for env_var, (section, attr, type_conv) in env_mappings.items():
value = os.environ.get(env_var)
if value is not None:
try:
converted_value = type_conv(value)
if section is not None:
setattr(getattr(config, section), attr, converted_value)
else:
setattr(config, attr, converted_value)
except (ValueError, TypeError):
pass # Skip invalid values
return config
def _config_to_dict(config: AdamOpsConfig) -> Dict[str, Any]:
"""Convert configuration object to dictionary."""
return {
"data": {
"default_encoding": config.data.default_encoding,
"missing_threshold": config.data.missing_threshold,
"outlier_method": config.data.outlier_method,
"outlier_threshold": config.data.outlier_threshold,
"validation_sample_size": config.data.validation_sample_size,
"auto_detect_types": config.data.auto_detect_types,
},
"model": {
"default_random_state": config.model.default_random_state,
"cv_folds": config.model.cv_folds,
"early_stopping_rounds": config.model.early_stopping_rounds,
"n_jobs": config.model.n_jobs,
"verbose": config.model.verbose,
},
"automl": {
"time_limit": config.automl.time_limit,
"max_trials": config.automl.max_trials,
"tuning_method": config.automl.tuning_method,
"optimization_metric": config.automl.optimization_metric,
"early_stopping": config.automl.early_stopping,
},
"deployment": {
"default_port": config.deployment.default_port,
"default_host": config.deployment.default_host,
"api_framework": config.deployment.api_framework,
"enable_cors": config.deployment.enable_cors,
"log_requests": config.deployment.log_requests,
},
"monitoring": {
"drift_threshold": config.monitoring.drift_threshold,
"alert_email": config.monitoring.alert_email,
"check_interval": config.monitoring.check_interval,
"log_predictions": config.monitoring.log_predictions,
},
"logging": {
"level": config.logging.level,
"format": config.logging.format,
"file": config.logging.file,
"console": config.logging.console,
"max_bytes": config.logging.max_bytes,
"backup_count": config.logging.backup_count,
},
"registry_backend": config.registry_backend,
"registry_path": config.registry_path,
"cache_enabled": config.cache_enabled,
"cache_path": config.cache_path,
}
def _dict_to_config(config_dict: Dict[str, Any]) -> AdamOpsConfig:
"""Convert dictionary to configuration object."""
config = AdamOpsConfig()
# Data config
if "data" in config_dict:
data = config_dict["data"]
config.data = DataConfig(
default_encoding=data.get("default_encoding", config.data.default_encoding),
missing_threshold=data.get("missing_threshold", config.data.missing_threshold),
outlier_method=data.get("outlier_method", config.data.outlier_method),
outlier_threshold=data.get("outlier_threshold", config.data.outlier_threshold),
validation_sample_size=data.get("validation_sample_size", config.data.validation_sample_size),
auto_detect_types=data.get("auto_detect_types", config.data.auto_detect_types),
)
# Model config
if "model" in config_dict:
model = config_dict["model"]
config.model = ModelConfig(
default_random_state=model.get("default_random_state", config.model.default_random_state),
cv_folds=model.get("cv_folds", config.model.cv_folds),
early_stopping_rounds=model.get("early_stopping_rounds", config.model.early_stopping_rounds),
n_jobs=model.get("n_jobs", config.model.n_jobs),
verbose=model.get("verbose", config.model.verbose),
)
# AutoML config
if "automl" in config_dict:
automl = config_dict["automl"]
config.automl = AutoMLConfig(
time_limit=automl.get("time_limit", config.automl.time_limit),
max_trials=automl.get("max_trials", config.automl.max_trials),
tuning_method=automl.get("tuning_method", config.automl.tuning_method),
optimization_metric=automl.get("optimization_metric", config.automl.optimization_metric),
early_stopping=automl.get("early_stopping", config.automl.early_stopping),
)
# Deployment config
if "deployment" in config_dict:
deploy = config_dict["deployment"]
config.deployment = DeploymentConfig(
default_port=deploy.get("default_port", config.deployment.default_port),
default_host=deploy.get("default_host", config.deployment.default_host),
api_framework=deploy.get("api_framework", config.deployment.api_framework),
enable_cors=deploy.get("enable_cors", config.deployment.enable_cors),
log_requests=deploy.get("log_requests", config.deployment.log_requests),
)
# Monitoring config
if "monitoring" in config_dict:
monitor = config_dict["monitoring"]
config.monitoring = MonitoringConfig(
drift_threshold=monitor.get("drift_threshold", config.monitoring.drift_threshold),
alert_email=monitor.get("alert_email", config.monitoring.alert_email),
check_interval=monitor.get("check_interval", config.monitoring.check_interval),
log_predictions=monitor.get("log_predictions", config.monitoring.log_predictions),
)
# Logging config
if "logging" in config_dict:
log = config_dict["logging"]
config.logging = LoggingConfig(
level=log.get("level", config.logging.level),
format=log.get("format", config.logging.format),
file=log.get("file", config.logging.file),
console=log.get("console", config.logging.console),
max_bytes=log.get("max_bytes", config.logging.max_bytes),
backup_count=log.get("backup_count", config.logging.backup_count),
)
# General settings
config.registry_backend = config_dict.get("registry_backend", config.registry_backend)
config.registry_path = config_dict.get("registry_path", config.registry_path)
config.cache_enabled = config_dict.get("cache_enabled", config.cache_enabled)
config.cache_path = config_dict.get("cache_path", config.cache_path)
return config
[docs]
def update_config(**kwargs) -> AdamOpsConfig:
"""
Update specific configuration values.
Args:
**kwargs: Configuration values in format section__key=value.
Returns:
AdamOpsConfig: Updated configuration object.
Example:
>>> config = update_config(model__cv_folds=10, automl__time_limit=7200)
>>> print(config.model.cv_folds)
10
"""
config = get_config()
for key, value in kwargs.items():
if "__" in key:
section, attr = key.split("__", 1)
if hasattr(config, section):
section_config = getattr(config, section)
if hasattr(section_config, attr):
setattr(section_config, attr, value)
elif hasattr(config, key):
setattr(config, key, value)
return config