palma.base package#

Submodules#

palma.base.engine module#

class palma.base.engine.BaseOptimizer(engine_parameters: dict)#

Bases: object

Attributes:
allow_splitter
engine_parameters
estimator_
optimizer
transformer_

Methods

allowing_splitter

optimize

property allow_splitter#
allowing_splitter(splitter)#
property engine_parameters: Dict#
abstract property estimator_: None#
abstract optimize(X: DataFrame, y: Series, splitter: ValidationStrategy | None = None) None#
abstract property optimizer: None#
abstract property transformer_: None#
class palma.base.engine.FlamlOptimizer(problem: str, engine_parameters: dict)#

Bases: BaseOptimizer

Attributes:
allow_splitter
engine_parameters
estimator_
optimizer
transformer_

Methods

allowing_splitter

optimize

property allow_splitter#
property estimator_: BaseEstimator#
optimize(X: DataFrame, y: DataFrame, splitter: ValidationStrategy | None = None) None#
property optimizer: AutoML#
property transformer_: flaml.data.DataTransformer#

palma.base.model module#

class palma.base.model.ModelEvaluation(estimator)#

Bases: object

Attributes:
components
id
unfit_estimator

Methods

add

fit

add(component, name=None)#
property components#
fit(project: Project)#
property id: str#
property unfit_estimator#

palma.base.model_selection module#

class palma.base.model_selection.ModelSelector(engine: str | BaseOptimizer, engine_parameters: Dict)#

Bases: object

Wrapper to optimizers selecting the best model for a Project.

The optimization can be launched with the start method. Once the optimization is done, the best model can be accessed as the best_model_ attribute.

Parameters:
- engine (str): Currently accepted values are “FlamlOptimizer” or

“AutoSklearnOptimizer” (the latter is deprecatted).

- engine_parameters (dict): parameters passed to the engine.
Attributes:
run_id

Methods

- start(project: Project): look for best model

property run_id: str#
start(project: Project)#

palma.base.project module#

class palma.base.project.Project(project_name: str, problem: str)#

Bases: object

Represents a machine learning project with various components and logging capabilities.

Parameters:
project_name (str): The name of the project.
problem (str): The description of the machine learning problem.

Accepted values: “classification” or “regression”.

Attributes:
base_index (List[int]): List of base indices for the project.
components (dict): Dictionary containing project components.
date (datetime): The date and time when the project was created.
project_id (str): Unique identifier for the project.
is_started (bool): Indicates whether the project has been started.
problem (str): Description of the machine learning problem.
validation_strategy (ValidationStrategy): The validation strategy used in the project.
project_name (str): The name of the project.
study_name (str): The randomly generated study name.
X (pd.DataFrame): The feature data for the project.
y (pd.Series): The target variable for the project.

Methods

add(component: Component) -> None: Adds a component to the project.

start(X: pd.DataFrame, y: pd.Series, splitter, X_test=None, y_test=None, groups=None, **kwargs) -> None:

Starts the project with the specified data and validation strategy.

property X: DataFrame#
add(component: Component) None#
property components: dict#
property date: datetime#
property is_started: bool#
property problem: str#
property project_id: str#
property project_name: str#
start(X: DataFrame, y: Series, splitter, X_test=None, y_test=None, groups=None, **kwargs) None#
property study_name: str#
property validation_strategy: ValidationStrategy#
property y: Series#

palma.base.splitting_strategy module#

class palma.base.splitting_strategy.ValidationStrategy(splitter: BaseShuffleSplit | BaseCrossValidator | List[tuple] | List[str], **kwargs)#

Bases: object

Validation strategy for a machine learning project.

Parameters:
- splitter (Union[BaseShuffleSplit, BaseCrossValidator, List[tuple], List[str]]): The data splitting strategy.
Attributes:
- test_index (np.ndarray): Index array for the test set.
- train_index (np.ndarray): Index array for the training set.
- indexes_val (list): List of indexes for validation sets.
- indexes_train_test (list): List containing tuples of training and test indexes.
- id: Unique identifier for the validation strategy.
- splitter: The data splitting strategy.

Methods

- __call__(X: pd.DataFrame, y: pd.Series, X_test: pd.DataFrame = None, y_test: pd.Series = None, groups=None, **kwargs):

Applies the validation strategy to the provided data.

property groups#
property id#
property indexes_train_test: list#
property indexes_val: list#
property splitter#
property test_index: ndarray#
property train_index: ndarray#

Module contents#