palma.components package

palma.components package#

Submodules#

palma.components.base module#

class palma.components.base.Component#: Bases: object

class palma.components.base.ModelComponent#

Bases: Component

Base Model Component class

Methods

__call__(project, model)

Call self as a function.

class palma.components.base.ProjectComponent#

Bases: Component

Base Project Component class

This object ensures that all subclasses Project component implements a

Methods

__call__(project)

Call self as a function.

palma.components.checker module#

palma.components.dashboard module#

class palma.components.dashboard.ExplainerDashboard(dashboard_config: str | Dict = '/home/runner/work/palma/palma/palma/components/configuration_materials/default_dashboard.yaml', n_sample: int | None = None)#

Bases: Component

Methods

`__call__`(project, model)	This function returns dashboard instance.
`update_config`(dict_value)	Update specific parameters from the actual configuration.

update_config(dict_value: Dict[str, Dict])#

Update specific parameters from the actual configuration.

Parameters:

dict_value: dict

explainer_parameters: dict: Parameters to be used in see explainerdashboard.RegressionExplainer or explainerdashboard.ClassifierExplainer.
dashboard_parameters: dict: Parameters use to compose dashboard tab, items or themes for explainerdashboard.ExplainerDashboard. Tabs and component of the dashboard can be hidden, see customize dashboard section for more detail.

palma.components.data_checker module#

palma.components.data_profiler module#

class palma.components.data_profiler.ProfilerYData(**config)#

Bases: ProjectComponent

Methods

__call__(project)

Call self as a function.

palma.components.logger module#

class palma.components.logger.DummyLogger(uri: str, **kwargs)#

Bases: Logger

Attributes:

uri

Methods

log_artifact
log_metrics
log_params
log_project

log_artifact(obj, path: str) → None#

log_metrics(metrics: dict, path: str) → None#

log_params(parameters: dict, path: str) → None#

log_project(project: Project) → None#

class palma.components.logger.FileSystemLogger(uri: str = '/tmp', **kwargs)#

Bases: Logger

A logger for saving artifacts and metadata to the file system.

Parameters:

uristr, optional: The root path or directory where artifacts and metadata will be saved. Defaults to the system temporary directory.
**kwargsdict: Additional keyword arguments to pass to the base logger.

Attributes:

path_projectstr: The path to the project directory.
path_studystr: The path to the study directory within the project.

Methods

log_project(project: Project) -> None	Performs the first level of backup by creating folders and saving an instance of `Project`.
log_metrics(metrics: dict, path: str) -> None	Saves metrics in JSON format at the specified path.
log_artifact(obj, path: str) -> None	Saves an artifact at the specified path, handling different types of objects.
log_params(parameters: dict, path: str) -> None	Saves model parameters in JSON format at the specified path.

log_artifact(obj, path: str) → None#

Logs an artifact, handling different types of objects.

Parameters:

objany: The artifact to be logged.
pathstr: The relative path (from the study directory) where the artifact will be saved.

log_metrics(metrics: dict, path: str) → None#

Logs metrics to a JSON file.

Parameters:

metricsdict: The metrics to be logged.
pathstr: The relative path (from the study directory) where the metrics JSON file will be saved.

log_params(parameters: dict, path: str) → None#

Logs model parameters to a JSON file.

Parameters:

parametersdict: The model parameters to be logged.
pathstr: The relative path (from the study directory) where the parameters JSON file will be saved.

log_project(project: Project) → None#

log_project performs the first level of backup as described in the object description.

This method creates the needed folders and saves an instance of Project.

Parameters:

project: :class:`~palma.Project`: an instance of Project

class palma.components.logger.Logger(uri: str, **kwargs)#

Bases: object

Logger is an abstract class that defines a common interface for a set of Logger-subclasses.

It provides common methods for all possible subclasses, making it possible for a user to create a custom subclass compatible with the rest of the components.

Attributes:

uri

Methods

log_artifact
log_metrics
log_params
log_project

abstract log_artifact(**kwargs) → None#

abstract log_metrics(metrics: dict, path: str) → None#

abstract log_params(**kwargs) → None#

abstract log_project(project: Project) → None#

property uri#

class palma.components.logger.MLFlowLogger(uri: str, artifact_location: str = '.mlruns')#

Bases: Logger

MLFlowLogger class for logging experiments using MLflow.

Parameters:

uristr: The URI for the MLflow tracking server.
artifact_locationstr: The place to save artifact on file system logger

Raises:

ImportError: If mlflow is not installed.

Attributes:

tmp_logger(FileSystemLogger): Temporary logger for local logging before MLflow logging.

Methods

log_project(project: ‘Project’) -> None:	Logs the project information to MLflow, including project name and parameters.
log_metrics(metrics: dict[str, typing.Any]) -> None:	Logs metrics to MLflow.
log_artifact(artifact: dict, path) -> None:	Logs artifacts to MLflow using the temporary logger.
log_params(params: dict) -> None:	Logs parameters to MLflow.
log_model(model, path) -> None:	Logs the model to MLflow using the temporary logger.

log_artifact(artifact: dict, path) → None#

log_metrics(metrics: dict[str, Any], path=None) → None#

log_params(params: dict) → None#

log_project(project: Project) → None#

palma.components.logger.set_logger(_logger) → None#

Parameters:

_logger: Logger

Define the logger to use.

>>> from palma import logger, set_logger
>>> from palma.components import FileSystemLogger
>>> from palma.components import MLFlowLogger
>>> set_logger(MLFlowLogger(uri="."))
>>> set_logger(FileSystemLogger(uri="."))

palma.components.performance module#

class palma.components.performance.Analyser(on)#

Bases: ModelComponent

Analyser class for performing analysis on a model.

Parameters:

onstr: The type of analysis to perform. Possible values are “indexes_train_test” or “indexes_val”.

Attributes:

metrics

Methods

`__call__`(project, model)	Call self as a function.
`compute_metrics`(metric)	Compute the specified metrics for each estimator.
`get_test_metrics`()	Get the computed metrics for the test set.
`get_train_metrics`()	Get the computed metrics for the training set.
`plot_variable_importance`([mode, color, cmap])	Plot the variable importance.
`variable_importance`()	Compute the feature importance for each estimator.

compute_metrics(metric: dict)#

Compute the specified metrics for each estimator.

Parameters:

metricdict: Dictionary containing the metric name as key and the metric function as value.

get_test_metrics() → DataFrame#

Get the computed metrics for the test set.

Returns:

pd.DataFrame: DataFrame containing the computed metrics for the test set.

get_train_metrics() → DataFrame#

Get the computed metrics for the training set.

Returns:

pd.DataFrame: DataFrame containing the computed metrics for the training set.

property metrics#

plot_variable_importance(mode='minmax', color='darkblue', cmap='flare', **kwargs)#

Plot the variable importance.

Parameters:

modestr, optional: The mode for plotting the variable importance, by default “minmax”.
colorstr, optional: The color for the plot, by default “darkblue”.
cmapstr, optional: The colormap for the plot, by default “flare”.

variable_importance()#

Compute the feature importance for each estimator.

Returns:

feature_importancepandas.DataFrame: DataFrame containing the feature importance values for each estimator.

class palma.components.performance.PermutationFeatureImportance(n_repeat: int = 5, random_state: int = 42, n_job: int = 2, scoring: str | None = None, max_samples: int | float = 0.7, color: str = 'darkblue')#

Bases: ModelComponent

Class for doing permutation feature importance

Parameters:

n_repeat: int: The number of times to permute a feature.
random_state: int: The pseudo-random number generator to control the permutations of each feature.
n_job: int: The number of jobs to run in parallel. If n_job = -1, it takes all processors.
max_samples: int or float: The number of samples to draw from X to compute feature importance in each repeat (without replacement). If int, then draw max_samples samples. If float, then draw max_samples * X.shape[0] samples.
color: str: The color for bar plot.

Methods

plot_permutation_feature_importance()

Plotting the result of feature permutation ONLY on the TRAINING SET

plot_permutation_feature_importance()#

class palma.components.performance.RegressionAnalysis(on)#

Bases: Analyser

Analyser class for performing analysis on a regression model.

Parameters:

onstr: The type of analysis to perform. Possible values are “indexes_train_test” or “indexes_val”.

Attributes:

_hidden_metricsdict: Dictionary to store additional metrics that are not displayed.

Methods

variable_importance()	Compute the feature importance for each estimator.
compute_metrics(metric: dict)	Compute the specified metrics for each estimator.
get_train_metrics() -> pd.DataFrame	Get the computed metrics for the training set.
get_test_metrics() -> pd.DataFrame	Get the computed metrics for the test set.
plot_variable_importance(mode=”minmax”, color=”darkblue”, cmap=”flare”)	Plot the variable importance.
plot_prediction_versus_real	Plot prediction versus real values
plot_errors_pairgrid	Plot pair grid errors

compute_predictions_errors(fun=None)#

plot_errors_pairgrid(fun=None, number_percentiles=4, palette='rocket_r', features=None)#

plot_prediction_versus_real(colormap=<matplotlib.colors.LinearSegmentedColormap object>)#

class palma.components.performance.ScoringAnalysis(on)#

Bases: Analyser

The ScoringAnalyser class provides methods for analyzing the performance of a machine learning model.

Attributes:

metrics
threshold

Methods

`__call__`(project, model)	Call self as a function.
`compute_metrics`(metric)	Compute the specified metrics for each estimator.
`compute_threshold`([method, value, metric])	Compute threshold using various heuristics
`confusion_matrix`([in_percentage])	Compute the confusion matrix.
`get_test_metrics`()	Get the computed metrics for the test set.
`get_train_metrics`()	Get the computed metrics for the training set.
`plot_roc_curve`([plot_method, plot_train, c, ...])	Plot the ROC curve.
`plot_threshold`(**plot_kwargs)	Plot the threshold on fpr/tpr axes
`plot_variable_importance`([mode, color, cmap])	Plot the variable importance.
`variable_importance`()	Compute the feature importance for each estimator.

compute_threshold(method: str = 'total_population', value: float = 0.5, metric: Callable | None = None)#

Compute threshold using various heuristics

Parameters:

methodstr, optional

The method to compute the threshold, by default “total_population”

total population : compute threshold so that the percentage of

positive prediction is equal to value - fpr : compute threshold so that the false positive rate is equal to value - optimize_metric : compute threshold so that the metric is optimized value parameter is ignored, metric parameter must be provided

valuefloat, optional

The value to use for the threshold computation, by default 0.5

metrictyping.Callable, optional

The metric function to use for the threshold computation, by default None

Returns:

float: The computed threshold

confusion_matrix(in_percentage=False)#

Compute the confusion matrix.

Parameters:

in_percentagebool, optional: Whether to return the confusion matrix in percentage, by default False

Returns:

pandas.DataFrame: The confusion matrix

plot_roc_curve(plot_method='mean', plot_train: bool = False, c='C0', cmap: str = 'inferno', label: str = '', mode: str = 'std', label_iter: iter | None = None, plot_base: bool = True, **kwargs)#

Plot the ROC curve.

Parameters:

plot_methodstr,

Select the type of plot for ROC curve

“beam” (default) to plot all the curves using shades
“all” to plot each ROC curve
“mean” plot the mean ROC curve

plot_train: bool

If True the train ROC curves will be plot, default False.

c: str

Not used only with plot_method=”all”. Set the color of ROC curve

cmap: str

label

mode

label_iter

plot_base: bool,

Plot basic ROC curve helper

kwargs:

Deprecated

Returns:

plot_threshold(**plot_kwargs)#

Plot the threshold on fpr/tpr axes

Parameters:

plot_kwargsdict, optional: Additional keyword arguments to pass to the scatter plot function

Returns:

matplotlib.pyplot: The threshold plot

property threshold#

class palma.components.performance.ShapAnalysis(on, n_shap, compute_interaction=False)#

Bases: Analyser

Attributes:

metrics

Methods

`__call__`(project, model)	Call self as a function.
`compute_metrics`(metric)	Compute the specified metrics for each estimator.
`get_test_metrics`()	Get the computed metrics for the test set.
`get_train_metrics`()	Get the computed metrics for the training set.
`plot_variable_importance`([mode, color, cmap])	Plot the variable importance.
`variable_importance`()	Compute the feature importance for each estimator.

plot_shap_decision_plot
plot_shap_interaction
plot_shap_summary_plot

plot_shap_decision_plot(**kwargs)#

plot_shap_interaction(feature_x, feature_y)#

plot_shap_summary_plot()#

palma.components package

Contents

palma.components package#

Submodules#

palma.components.base module#

palma.components.checker module#

palma.components.dashboard module#

palma.components.data_checker module#

palma.components.data_profiler module#

palma.components.logger module#

palma.components.performance module#

Module contents#