| Title: | R6-Based ML Learners for 'mlexperiments' |
|---|---|
| Description: | Enhances 'mlexperiments' <https://CRAN.R-project.org/package=mlexperiments> with additional machine learning ('ML') learners. The package provides R6-based learners for the following algorithms: 'glmnet' <https://CRAN.R-project.org/package=glmnet>, 'ranger' <https://CRAN.R-project.org/package=ranger>, 'xgboost' <https://CRAN.R-project.org/package=xgboost>, and 'lightgbm' <https://CRAN.R-project.org/package=lightgbm>. These can be used directly with the 'mlexperiments' R package. |
| Authors: | Lorenz A. Kapsner [cre, aut, cph] (ORCID: <https://orcid.org/0000-0003-1866-860X>) |
| Maintainer: | Lorenz A. Kapsner <[email protected]> |
| License: | GPL (>= 3) |
| Version: | 0.0.8 |
| Built: | 2026-05-18 09:36:39 UTC |
| Source: | https://github.com/kapsner/mllrnrs |
The LearnerGlmnet class is the interface to the glmnet R package for use
with the mlexperiments package.
Optimization metric: Can be used with
mlexperiments::MLLearnerBase -> LearnerGlmnet
new()
Create a new LearnerGlmnet object.
LearnerGlmnet$new(metric_optimization_higher_better)
metric_optimization_higher_betterA logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerGlmnet R6 object.
if (requireNamespace("glmnet", quietly = TRUE)) {
LearnerGlmnet$new(metric_optimization_higher_better = FALSE)
}
clone()
The objects of this class are cloneable with this method.
LearnerGlmnet$clone(deep = FALSE)
deepWhether to make a deep clone.
glmnet::glmnet(), glmnet::cv.glmnet()
# binary classification if (requireNamespace("glmnet", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) glmnet_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerGlmnet$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) glmnet_cv$learner_args <- list( alpha = 1, lambda = 0.1, family = "binomial", type.measure = "class", standardize = TRUE ) glmnet_cv$predict_args <- list(type = "response") glmnet_cv$performance_metric_args <- list(positive = "1", negative = "0") glmnet_cv$performance_metric <- mlexperiments::metric("AUC") # set data glmnet_cv$set_data( x = train_x, y = train_y ) glmnet_cv$execute() } ## ------------------------------------------------ ## Method `LearnerGlmnet$new` ## ------------------------------------------------ if (requireNamespace("glmnet", quietly = TRUE)) { LearnerGlmnet$new(metric_optimization_higher_better = FALSE) }# binary classification if (requireNamespace("glmnet", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) glmnet_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerGlmnet$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) glmnet_cv$learner_args <- list( alpha = 1, lambda = 0.1, family = "binomial", type.measure = "class", standardize = TRUE ) glmnet_cv$predict_args <- list(type = "response") glmnet_cv$performance_metric_args <- list(positive = "1", negative = "0") glmnet_cv$performance_metric <- mlexperiments::metric("AUC") # set data glmnet_cv$set_data( x = train_x, y = train_y ) glmnet_cv$execute() } ## ------------------------------------------------ ## Method `LearnerGlmnet$new` ## ------------------------------------------------ if (requireNamespace("glmnet", quietly = TRUE)) { LearnerGlmnet$new(metric_optimization_higher_better = FALSE) }
The LearnerLightgbm class is the interface to the lightgbm R package for
use with the mlexperiments package.
Optimization metric: needs to be specified with the learner parameter
metric. The following options can be set via options():
"mlexperiments.optim.lgb.nrounds" (default: 5000L)
"mlexperiments.optim.lgb.early_stopping_rounds" (default: 500L)
"mlexperiments.lgb.print_every_n" (default: 50L)
"mlexperiments.lgb.verbose" (default: -1L)
LearnerLightgbm can be used with
mlexperiments::MLLearnerBase -> LearnerLightgbm
new()
Create a new LearnerLightgbm object.
LearnerLightgbm$new(metric_optimization_higher_better)
metric_optimization_higher_betterA logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerLightgbm R6 object.
if (requireNamespace("lightgbm", quietly = TRUE)) {
LearnerLightgbm$new(metric_optimization_higher_better = FALSE)
}
clone()
The objects of this class are cloneable with this method.
LearnerLightgbm$clone(deep = FALSE)
deepWhether to make a deep clone.
lightgbm::lgb.train(), lightgbm::lgb.cv()
# binary classification if (requireNamespace("lightgbm", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_lightgbm <- expand.grid( bagging_fraction = seq(0.6, 1, .2), feature_fraction = seq(0.6, 1, .2), min_data_in_leaf = seq(10, 50, 10), learning_rate = seq(0.1, 0.2, 0.1), num_leaves = seq(10, 50, 10), max_depth = -1L ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) lightgbm_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerLightgbm$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) lightgbm_cv$learner_args <- c( as.list( data.table::data.table( param_list_lightgbm[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary", metric = "binary_logloss" ), nrounds = 45L ) lightgbm_cv$performance_metric_args <- list(positive = "1", negative = "0") lightgbm_cv$performance_metric <- mlexperiments::metric("AUC") # set data lightgbm_cv$set_data( x = train_x, y = train_y ) lightgbm_cv$execute() } ## ------------------------------------------------ ## Method `LearnerLightgbm$new` ## ------------------------------------------------ if (requireNamespace("lightgbm", quietly = TRUE)) { LearnerLightgbm$new(metric_optimization_higher_better = FALSE) }# binary classification if (requireNamespace("lightgbm", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_lightgbm <- expand.grid( bagging_fraction = seq(0.6, 1, .2), feature_fraction = seq(0.6, 1, .2), min_data_in_leaf = seq(10, 50, 10), learning_rate = seq(0.1, 0.2, 0.1), num_leaves = seq(10, 50, 10), max_depth = -1L ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) lightgbm_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerLightgbm$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2, seed = 123 ) lightgbm_cv$learner_args <- c( as.list( data.table::data.table( param_list_lightgbm[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary", metric = "binary_logloss" ), nrounds = 45L ) lightgbm_cv$performance_metric_args <- list(positive = "1", negative = "0") lightgbm_cv$performance_metric <- mlexperiments::metric("AUC") # set data lightgbm_cv$set_data( x = train_x, y = train_y ) lightgbm_cv$execute() } ## ------------------------------------------------ ## Method `LearnerLightgbm$new` ## ------------------------------------------------ if (requireNamespace("lightgbm", quietly = TRUE)) { LearnerLightgbm$new(metric_optimization_higher_better = FALSE) }
The LearnerRanger class is the interface to the ranger R package for use
with the mlexperiments package.
Optimization metric:
classification: classification error rate
regression: mean squared error Can be used with
mlexperiments::MLLearnerBase -> LearnerRanger
new()
Create a new LearnerRanger object.
LearnerRanger$new()
A new LearnerRanger R6 object.
if (requireNamespace("ranger", quietly = TRUE)) {
LearnerRanger$new()
}
clone()
The objects of this class are cloneable with this method.
LearnerRanger$clone(deep = FALSE)
deepWhether to make a deep clone.
# binary classification if (requireNamespace("ranger", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_ranger <- expand.grid( num.trees = seq(500, 1000, 500), mtry = seq(2, 6, 2), min.node.size = seq(1, 9, 4), max.depth = seq(1, 9, 4), sample.fraction = seq(0.5, 0.8, 0.3) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) ranger_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerRanger$new(), fold_list = fold_list, ncores = 2, seed = 123 ) ranger_cv$learner_args <- c( as.list( data.table::data.table( param_list_ranger[37, ], stringsAsFactors = FALSE ), ), list(classification = TRUE) ) ranger_cv$performance_metric_args <- list(positive = "1", negative = "0") ranger_cv$performance_metric <- mlexperiments::metric("AUC") # set data ranger_cv$set_data( x = train_x, y = train_y ) ranger_cv$execute() } ## ------------------------------------------------ ## Method `LearnerRanger$new` ## ------------------------------------------------ if (requireNamespace("ranger", quietly = TRUE)) { LearnerRanger$new() }# binary classification if (requireNamespace("ranger", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_ranger <- expand.grid( num.trees = seq(500, 1000, 500), mtry = seq(2, 6, 2), min.node.size = seq(1, 9, 4), max.depth = seq(1, 9, 4), sample.fraction = seq(0.5, 0.8, 0.3) ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) ranger_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerRanger$new(), fold_list = fold_list, ncores = 2, seed = 123 ) ranger_cv$learner_args <- c( as.list( data.table::data.table( param_list_ranger[37, ], stringsAsFactors = FALSE ), ), list(classification = TRUE) ) ranger_cv$performance_metric_args <- list(positive = "1", negative = "0") ranger_cv$performance_metric <- mlexperiments::metric("AUC") # set data ranger_cv$set_data( x = train_x, y = train_y ) ranger_cv$execute() } ## ------------------------------------------------ ## Method `LearnerRanger$new` ## ------------------------------------------------ if (requireNamespace("ranger", quietly = TRUE)) { LearnerRanger$new() }
The LearnerXgboost class is the interface to the xgboost R package for
use with the mlexperiments package.
Optimization metric: needs to be specified with the learner parameter
eval_metric. The following options can be set via options():
"mlexperiments.optim.xgb.nrounds" (default: 5000L)
"mlexperiments.optim.xgb.early_stopping_rounds" (default: 500L)
"mlexperiments.xgb.print_every_n" (default: 50L)
"mlexperiments.xgb.verbose" (default: FALSE)
LearnerXgboost can be used with
mlexperiments::MLLearnerBase -> LearnerXgboost
new()
Create a new LearnerXgboost object.
LearnerXgboost$new(metric_optimization_higher_better)
metric_optimization_higher_betterA logical. Defines the direction of the optimization metric used throughout the hyperparameter optimization.
A new LearnerXgboost R6 object.
if (requireNamespace("xgboost", quietly = TRUE)) {
LearnerXgboost$new(metric_optimization_higher_better = FALSE)
}
clone()
The objects of this class are cloneable with this method.
LearnerXgboost$clone(deep = FALSE)
deepWhether to make a deep clone.
xgboost::xgb.train(), xgboost::xgb.cv()
if (requireNamespace("xgboost", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { # binary classification Sys.setenv("OMP_THREAD_LIMIT" = 2) library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_xgboost <- expand.grid( subsample = seq(0.6, 1, .2), colsample_bytree = seq(0.6, 1, .2), min_child_weight = seq(1, 5, 4), learning_rate = seq(0.1, 0.2, 0.1), max_depth = seq(1, 5, 4), nthread = 2 ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) xgboost_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerXgboost$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2L, seed = 123 ) xgboost_cv$learner_args <- c( as.list( data.table::data.table( param_list_xgboost[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary:logistic", eval_metric = "logloss" ), nrounds = 45L ) xgboost_cv$performance_metric_args <- list(positive = "1", negative = "0") xgboost_cv$performance_metric <- mlexperiments::metric("AUC") # set data xgboost_cv$set_data( x = train_x, y = train_y ) xgboost_cv$execute() } ## ------------------------------------------------ ## Method `LearnerXgboost$new` ## ------------------------------------------------ if (requireNamespace("xgboost", quietly = TRUE)) { LearnerXgboost$new(metric_optimization_higher_better = FALSE) }if (requireNamespace("xgboost", quietly = TRUE) && requireNamespace("mlbench", quietly = TRUE) && requireNamespace("measures", quietly = TRUE)) { # binary classification Sys.setenv("OMP_THREAD_LIMIT" = 2) library(mlbench) data("PimaIndiansDiabetes2") dataset <- PimaIndiansDiabetes2 |> data.table::as.data.table() |> na.omit() seed <- 123 feature_cols <- colnames(dataset)[1:8] param_list_xgboost <- expand.grid( subsample = seq(0.6, 1, .2), colsample_bytree = seq(0.6, 1, .2), min_child_weight = seq(1, 5, 4), learning_rate = seq(0.1, 0.2, 0.1), max_depth = seq(1, 5, 4), nthread = 2 ) train_x <- model.matrix( ~ -1 + ., dataset[, .SD, .SDcols = feature_cols] ) train_y <- as.integer(dataset[, get("diabetes")]) - 1L fold_list <- splitTools::create_folds( y = train_y, k = 3, type = "stratified", seed = seed ) xgboost_cv <- mlexperiments::MLCrossValidation$new( learner = mllrnrs::LearnerXgboost$new( metric_optimization_higher_better = FALSE ), fold_list = fold_list, ncores = 2L, seed = 123 ) xgboost_cv$learner_args <- c( as.list( data.table::data.table( param_list_xgboost[37, ], stringsAsFactors = FALSE ), ), list( objective = "binary:logistic", eval_metric = "logloss" ), nrounds = 45L ) xgboost_cv$performance_metric_args <- list(positive = "1", negative = "0") xgboost_cv$performance_metric <- mlexperiments::metric("AUC") # set data xgboost_cv$set_data( x = train_x, y = train_y ) xgboost_cv$execute() } ## ------------------------------------------------ ## Method `LearnerXgboost$new` ## ------------------------------------------------ if (requireNamespace("xgboost", quietly = TRUE)) { LearnerXgboost$new(metric_optimization_higher_better = FALSE) }