Title: | R6-Based ML Learners for 'mlexperiments' |
---|---|
Description: | Enhances 'mlexperiments' <https://CRAN.R-project.org/package=mlexperiments> with additional machine learning ('ML') learners. The package provides R6-based learners for the following algorithms: 'glmnet' <https://CRAN.R-project.org/package=glmnet>, 'ranger' <https://CRAN.R-project.org/package=ranger>, 'xgboost' <https://CRAN.R-project.org/package=xgboost>, and 'lightgbm' <https://CRAN.R-project.org/package=lightgbm>. These can be used directly with the 'mlexperiments' R package. |
Authors: | Lorenz A. Kapsner [cre, aut, cph] |
Maintainer: | Lorenz A. Kapsner <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.0.4 |
Built: | 2024-11-02 04:44:04 UTC |
Source: | https://github.com/kapsner/mllrnrs |
The LearnerGlmnet
class is the interface to the glmnet
R package for use
with the mlexperiments
package.
Optimization metric: Can be used with
mlexperiments::MLLearnerBase
-> LearnerGlmnet
new()
Create a new LearnerGlmnet
object.
LearnerGlmnet$new(metric_optimization_higher_better)
metric_optimization_higher_better
A logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerGlmnet
R6 object.
LearnerGlmnet$new(metric_optimization_higher_better = FALSE)
clone()
The objects of this class are cloneable with this method.
LearnerGlmnet$clone(deep = FALSE)
deep
Whether to make a deep clone.
glmnet::glmnet()
, glmnet::cv.glmnet()
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) glmnet_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerGlmnet$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) glmnet_cv$learner_args <- list( alpha = 1, lambda = 0.1, family = "binomial", type.measure = "class", standardize = TRUE ) glmnet_cv$predict_args <- list(type = "response") glmnet_cv$performance_metric_args <- list(positive = "1") glmnet_cv$performance_metric <- mlexperiments::metric("auc") # set data glmnet_cv$set_data( x = train_x, y = train_y ) glmnet_cv$execute() ## ------------------------------------------------ ## Method `LearnerGlmnet$new` ## ------------------------------------------------ LearnerGlmnet$new(metric_optimization_higher_better = FALSE)
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) glmnet_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerGlmnet$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) glmnet_cv$learner_args <- list( alpha = 1, lambda = 0.1, family = "binomial", type.measure = "class", standardize = TRUE ) glmnet_cv$predict_args <- list(type = "response") glmnet_cv$performance_metric_args <- list(positive = "1") glmnet_cv$performance_metric <- mlexperiments::metric("auc") # set data glmnet_cv$set_data( x = train_x, y = train_y ) glmnet_cv$execute() ## ------------------------------------------------ ## Method `LearnerGlmnet$new` ## ------------------------------------------------ LearnerGlmnet$new(metric_optimization_higher_better = FALSE)
The LearnerLightgbm
class is the interface to the lightgbm
R package for
use with the mlexperiments
package.
Optimization metric: needs to be specified with the learner parameter
metric
. The following options can be set via options()
:
"mlexperiments.optim.lgb.nrounds" (default: 5000L
)
"mlexperiments.optim.lgb.early_stopping_rounds" (default: 500L
)
"mlexperiments.lgb.print_every_n" (default: 50L
)
"mlexperiments.lgb.verbose" (default: -1L
)
LearnerLightgbm
can be used with
mlexperiments::MLLearnerBase
-> LearnerLightgbm
new()
Create a new LearnerLightgbm
object.
LearnerLightgbm$new(metric_optimization_higher_better)
metric_optimization_higher_better
A logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerLightgbm
R6 object.
LearnerLightgbm$new(metric_optimization_higher_better = FALSE)
clone()
The objects of this class are cloneable with this method.
LearnerLightgbm$clone(deep = FALSE)
deep
Whether to make a deep clone.
lightgbm::lgb.train()
, lightgbm::lgb.cv()
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_lightgbm <- expand.grid( bagging_fraction = seq(0.6, 1, .2), feature_fraction = seq(0.6, 1, .2), min_data_in_leaf = seq(10, 50, 10), learning_rate = seq(0.1, 0.2, 0.1), num_leaves = seq(10, 50, 10), max_depth = -1L ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) lightgbm_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerLightgbm$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) lightgbm_cv$learner_args <- c( as.list( data.table::data.table( param_list_lightgbm[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary", metric = "binary_logloss" ), nrounds = 45L ) lightgbm_cv$performance_metric_args <- list(positive = "1") lightgbm_cv$performance_metric <- mlexperiments::metric("auc") # set data lightgbm_cv$set_data( x = train_x, y = train_y ) lightgbm_cv$execute() ## ------------------------------------------------ ## Method `LearnerLightgbm$new` ## ------------------------------------------------ LearnerLightgbm$new(metric_optimization_higher_better = FALSE)
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_lightgbm <- expand.grid( bagging_fraction = seq(0.6, 1, .2), feature_fraction = seq(0.6, 1, .2), min_data_in_leaf = seq(10, 50, 10), learning_rate = seq(0.1, 0.2, 0.1), num_leaves = seq(10, 50, 10), max_depth = -1L ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) lightgbm_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerLightgbm$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) lightgbm_cv$learner_args <- c( as.list( data.table::data.table( param_list_lightgbm[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary", metric = "binary_logloss" ), nrounds = 45L ) lightgbm_cv$performance_metric_args <- list(positive = "1") lightgbm_cv$performance_metric <- mlexperiments::metric("auc") # set data lightgbm_cv$set_data( x = train_x, y = train_y ) lightgbm_cv$execute() ## ------------------------------------------------ ## Method `LearnerLightgbm$new` ## ------------------------------------------------ LearnerLightgbm$new(metric_optimization_higher_better = FALSE)
The LearnerRanger
class is the interface to the ranger
R package for use
with the mlexperiments
package.
Optimization metric:
classification: classification error rate
regression: mean squared error Can be used with
mlexperiments::MLLearnerBase
-> LearnerRanger
new()
Create a new LearnerRanger
object.
LearnerRanger$new()
A new LearnerRanger
R6 object.
LearnerRanger$new()
clone()
The objects of this class are cloneable with this method.
LearnerRanger$clone(deep = FALSE)
deep
Whether to make a deep clone.
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_ranger <- expand.grid( num.trees = seq(500, 1000, 500), mtry = seq(2, 6, 2), min.node.size = seq(1, 9, 4), max.depth = seq(1, 9, 4), sample.fraction = seq(0.5, 0.8, 0.3) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) ranger_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerRanger$new(), fold_list = fold_list, ncores = 2, seed = 123 ) ranger_cv$learner_args <- c( as.list( data.table::data.table( param_list_ranger[37, ], stringsAsFactors = FALSE ), ), list(classification = TRUE) ) ranger_cv$performance_metric_args <- list(positive = "1") ranger_cv$performance_metric <- mlexperiments::metric("auc") # set data ranger_cv$set_data( x = train_x, y = train_y ) ranger_cv$execute() ## ------------------------------------------------ ## Method `LearnerRanger$new` ## ------------------------------------------------ LearnerRanger$new()
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_ranger <- expand.grid( num.trees = seq(500, 1000, 500), mtry = seq(2, 6, 2), min.node.size = seq(1, 9, 4), max.depth = seq(1, 9, 4), sample.fraction = seq(0.5, 0.8, 0.3) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) ranger_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerRanger$new(), fold_list = fold_list, ncores = 2, seed = 123 ) ranger_cv$learner_args <- c( as.list( data.table::data.table( param_list_ranger[37, ], stringsAsFactors = FALSE ), ), list(classification = TRUE) ) ranger_cv$performance_metric_args <- list(positive = "1") ranger_cv$performance_metric <- mlexperiments::metric("auc") # set data ranger_cv$set_data( x = train_x, y = train_y ) ranger_cv$execute() ## ------------------------------------------------ ## Method `LearnerRanger$new` ## ------------------------------------------------ LearnerRanger$new()
The LearnerXgboost
class is the interface to the xgboost
R package for
use with the mlexperiments
package.
Optimization metric: needs to be specified with the learner parameter
eval_metric
. The following options can be set via options()
:
"mlexperiments.optim.xgb.nrounds" (default: 5000L
)
"mlexperiments.optim.xgb.early_stopping_rounds" (default: 500L
)
"mlexperiments.xgb.print_every_n" (default: 50L
)
"mlexperiments.xgb.verbose" (default: FALSE
)
LearnerXgboost
can be used with
mlexperiments::MLLearnerBase
-> LearnerXgboost
new()
Create a new LearnerXgboost
object.
LearnerXgboost$new(metric_optimization_higher_better)
metric_optimization_higher_better
A logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerXgboost
R6 object.
LearnerXgboost$new(metric_optimization_higher_better = FALSE)
clone()
The objects of this class are cloneable with this method.
LearnerXgboost$clone(deep = FALSE)
deep
Whether to make a deep clone.
xgboost::xgb.train()
, xgboost::xgb.cv()
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_xgboost <- expand.grid( subsample = seq(0.6, 1, .2), colsample_bytree = seq(0.6, 1, .2), min_child_weight = seq(1, 5, 4), learning_rate = seq(0.1, 0.2, 0.1), max_depth = seq(1, 5, 4) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) xgboost_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerXgboost$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) xgboost_cv$learner_args <- c( as.list( data.table::data.table( param_list_xgboost[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary:logistic", eval_metric = "logloss" ), nrounds = 45L ) xgboost_cv$performance_metric_args <- list(positive = "1") xgboost_cv$performance_metric <- mlexperiments::metric("auc") # set data xgboost_cv$set_data( x = train_x, y = train_y ) xgboost_cv$execute() ## ------------------------------------------------ ## Method `LearnerXgboost$new` ## ------------------------------------------------ LearnerXgboost$new(metric_optimization_higher_better = FALSE)
# binary classification library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_xgboost <- expand.grid( subsample = seq(0.6, 1, .2), colsample_bytree = seq(0.6, 1, .2), min_child_weight = seq(1, 5, 4), learning_rate = seq(0.1, 0.2, 0.1), max_depth = seq(1, 5, 4) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) xgboost_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerXgboost$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) xgboost_cv$learner_args <- c( as.list( data.table::data.table( param_list_xgboost[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary:logistic", eval_metric = "logloss" ), nrounds = 45L ) xgboost_cv$performance_metric_args <- list(positive = "1") xgboost_cv$performance_metric <- mlexperiments::metric("auc") # set data xgboost_cv$set_data( x = train_x, y = train_y ) xgboost_cv$execute() ## ------------------------------------------------ ## Method `LearnerXgboost$new` ## ------------------------------------------------ LearnerXgboost$new(metric_optimization_higher_better = FALSE)