palma.base package

palma.base package#

Submodules#

palma.base.engine module#

class palma.base.engine.BaseOptimizer(engine_parameters: dict)#

Bases: object

Attributes:

allow_splitter
best_model_
engine_parameters
problem
run_id
transformer_

Methods

allowing_splitter
optimize
start

property allow_splitter#

allowing_splitter(splitter)#

abstract property best_model_: None#

property engine_parameters: Dict#

abstract optimize(X: DataFrame, y: Series, splitter: ValidationStrategy = None) → None#

property problem#

property run_id: str#

start(project: Project)#

abstract property transformer_: None#

class palma.base.engine.FlamlOptimizer(engine_parameters: dict)#

Bases: BaseOptimizer

Attributes:

allow_splitter
best_model_
engine_parameters
problem
run_id
transformer_

Methods

allowing_splitter
optimize
start

property allow_splitter#

property best_model_: BaseEstimator#

optimize(X: DataFrame, y: DataFrame, splitter: ValidationStrategy = None) → None#

property transformer_#

palma.base.model module#

class palma.base.model.ModelEvaluation(estimator)#

Bases: object

Attributes:

components
id
unfit_estimator

Methods

add
fit

add(component, name=None)#

property components#

fit(project: Project)#

property id: str#

property unfit_estimator#

palma.base.model_selection module#

class palma.base.model_selection.ModelSelector(engine: str | BaseOptimizer, engine_parameters: Dict)#

Bases: object

Wrapper to optimizers selecting the best model for a Project.

The optimization can be launched with the start method. Once the optimization is done, the best model can be accessed as the best_model_ attribute.

Parameters:

- engine (str): Currently accepted values are “FlamlOptimizer” or: “AutoSklearnOptimizer” (the latter is deprecatted).
- engine_parameters (dict): parameters passed to the engine.

Attributes:

run_id

Methods

- start(project: Project): look for best model

property run_id: str#

start(project: Project)#

palma.base.project module#

class palma.base.project.Project(project_name: str, problem: str)#

Bases: object

Represents a machine learning project with various components and logging capabilities.

Parameters:

project_name (str): The name of the project.
problem (str): The description of the machine learning problem.: Accepted values: “classification” or “regression”.

Attributes:

base_index (List[int]): List of base indices for the project.
components (dict): Dictionary containing project components.
date (datetime): The date and time when the project was created.
project_id (str): Unique identifier for the project.
is_started (bool): Indicates whether the project has been started.
problem (str): Description of the machine learning problem.
validation_strategy (ValidationStrategy): The validation strategy used in the project.
project_name (str): The name of the project.
study_name (str): The randomly generated study name.
X (pd.DataFrame): The feature data for the project.
y (pd.Series): The target variable for the project.

Methods

add(component: Component) -> None: Adds a component to the project.
start(X: pd.DataFrame, y: pd.Series, splitter, X_test=None, y_test=None, groups=None, kwargs) -> None:**	Starts the project with the specified data and validation strategy.

property X: DataFrame#

add(component: Component) → None#

property components: dict#

property date: datetime#

property is_started: bool#

property problem: str#

property project_id: str#

property project_name: str#

start(X: DataFrame, y: Series, splitter, X_test=None, y_test=None, groups=None, **kwargs) → None#

property study_name: str#

property validation_strategy: ValidationStrategy#

property y: Series#

palma.base.splitting_strategy module#

class palma.base.splitting_strategy.ValidationStrategy(splitter: BaseShuffleSplit | BaseCrossValidator | List[tuple] | List[str], **kwargs)#

Bases: object

Validation strategy for a machine learning project.

Parameters:

- splitter (Union[BaseShuffleSplit, BaseCrossValidator, List[tuple], List[str]]): The data splitting strategy.

Attributes:

- test_index (np.ndarray): Index array for the test set.
- train_index (np.ndarray): Index array for the training set.
- indexes_val (list): List of indexes for validation sets.
- indexes_train_test (list): List containing tuples of training and test indexes.
- id: Unique identifier for the validation strategy.
- splitter: The data splitting strategy.

Methods

- __call__(X: pd.DataFrame, y: pd.Series, X_test: pd.DataFrame = None, y_test: pd.Series = None, groups=None, **kwargs):

Applies the validation strategy to the provided data.

property groups#

property id#

property indexes_train_test: list#

property indexes_val: list#

property splitter#

property test_index: ndarray#

property train_index: ndarray#

palma.base package

Contents

palma.base package#

Submodules#

palma.base.engine module#

palma.base.model module#

palma.base.model_selection module#

palma.base.project module#

palma.base.splitting_strategy module#

Module contents#