mlr3oml enables downloading OpenML datasets, tasks, flows, runs,
and collections and allows to convert them to the corresponding mlr3 object. Furthermore, you can
also obtain the data and the resampling for a given OpenML task. Caching
can be enabled by setting the option "mlr3oml.cache"
.
Uploading to OpenML is currently not supported, use the OpenML package
package for this.
library("mlr3")
library("mlr3oml")
# be less verbose
::get_logger("mlr3oml")$set_threshold("warn")
lgr
# retrieve data set as task from OML
tsk("oml", data_id = 31)
## <TaskClassif:credit-g> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
## - fct (13): checking_status, credit_history, employment,
## foreign_worker, housing, job, other_parties, other_payment_plans,
## own_telephone, personal_status, property_magnitude, purpose,
## savings_status
## - int (7): age, credit_amount, duration, existing_credits,
## installment_commitment, num_dependents, residence_since
# retrieve a regular task from OML
tsk("oml", task_id = 59)
## <TaskClassif:iris> (150 x 5)
## * Target: class
## * Properties: multiclass
## * Features (4):
## - dbl (4): petallength, petalwidth, sepallength, sepalwidth
# retrieve resampling from OML
rsmp("oml", task_id = 59)
## <ResamplingCustom>: Custom Splits
## * Iterations: 10
## * Instantiated: TRUE
## * Parameters: list()
# R6 class for data sets
= OMLData$new(61) # or
odata = odt(61)
odata
$name odata
## [1] "iris"
$nrow odata
## [1] 150
$ncol odata
## [1] 5
head(odata$data)
## sepallength sepalwidth petallength petalwidth class
## 1: 5.1 3.5 1.4 0.2 Iris-setosa
## 2: 4.9 3.0 1.4 0.2 Iris-setosa
## 3: 4.7 3.2 1.3 0.2 Iris-setosa
## 4: 4.6 3.1 1.5 0.2 Iris-setosa
## 5: 5.0 3.6 1.4 0.2 Iris-setosa
## 6: 5.4 3.9 1.7 0.4 Iris-setosa
= as_data_backend(odata)
backend backend
## <DataBackendDataTable> (150x6)
## sepallength sepalwidth petallength petalwidth class ..row_id
## 5.1 3.5 1.4 0.2 Iris-setosa 1
## 4.9 3.0 1.4 0.2 Iris-setosa 2
## 4.7 3.2 1.3 0.2 Iris-setosa 3
## 4.6 3.1 1.5 0.2 Iris-setosa 4
## 5.0 3.6 1.4 0.2 Iris-setosa 5
## 5.4 3.9 1.7 0.4 Iris-setosa 6
## [...] (144 rows omitted)
# list oml data sets with 5 features and 50 - 200 instances
= list_oml_data(number_features = 5, number_instances = c(50, 200))
tab head(tab[, .(data_id, name)])
## data_id name
## 1: 61 iris
## 2: 199 fruitfly
## 3: 214 baskball
## 4: 329 hayes-roth
## 5: 346 aids
## 6: 668 witmer_census_1980
# R6 class for tasks
= OMLTask$new(31) # or
otask = otsk(31)
otask
= as_task(otask)
task task
## <TaskClassif:credit-g> (1000 x 21)
## * Target: class
## * Properties: twoclass
## * Features (20):
## - fct (13): checking_status, credit_history, employment,
## foreign_worker, housing, job, other_parties, other_payment_plans,
## own_telephone, personal_status, property_magnitude, purpose,
## savings_status
## - int (7): age, credit_amount, duration, existing_credits,
## installment_commitment, num_dependents, residence_since
= as_resampling(otask)
resampling resampling
## <ResamplingCustom>: Custom Splits
## * Iterations: 10
## * Instantiated: TRUE
## * Parameters: list()
$data otask
## <OMLData:31:credit-g> (1000x21)
## * Default target: class
$name otask
## [1] "Task 31: credit-g (Supervised Classification)"
$nrow otask
## [1] 1000
$ncol otask
## [1] 21
$task otask
## NULL
$resampling otask
## NULL
# list first 10 oml tasks
= list_oml_tasks(limit = 10)
tab tab[, .(task_id, data_id, name)]
## task_id data_id name
## 1: 2 2 anneal
## 2: 3 3 kr-vs-kp
## 3: 4 4 labor
## 4: 5 5 arrhythmia
## 5: 6 6 letter
## 6: 7 7 audiology
## 7: 8 8 liver-disorders
## 8: 9 9 autos
## 9: 10 10 lymph
## 10: 11 11 balance-scale
# R6 class for flows
= OMLFlow$new(100) # or
oflow = oflw(100)
oflow
$dependencies oflow
## [1] "Weka_3.7.5"
$parameter oflow
## name data_type default_value
## 1: A flag
## 2: B flag
## 3: C option 0.25
## 4: J flag
## 5: L flag
## 6: M option 2
## 7: N option
## 8: O flag
## 9: Q option
## 10: R flag
## 11: S flag
## 12: U flag
# non-executable pseudo learner
= as_learner(oflow, "regr")
learner learner
## <LearnerRegrOML100:oml.100>
## * Model: -
## * Parameters: list()
## * Packages: mlr3
## * Predict Types: [response]
## * Feature Types: -
## * Properties: -
# R6Class for run
= OMLRun$new(538858) # o
orun = orn(538858)
orun
$data orun
## <OMLData:952:prnn_fglass> (214x10)
## * Default target: type
$task orun
## <OMLTask:3815>
## * Type: Supervised Classification
## * Data: prnn_fglass (id: 952; dim: 214x10)
## * Target: type
## * Estimation: crossvalidation (id: 1; repeats: 1, folds: 10)
$flow orun
## <OMLFlow:3364>
## * Name: classif.boosting
## * Dependencies: mlr_2.8, adabag_4.1, rpart_4.1.10
head(orun$prediction)
## repeat. fold row_id prediction truth
## 1: 0 0 33 window_float_glass window_float_glass
## 2: 0 0 52 window_float_glass window_float_glass
## 3: 0 0 67 window_float_glass window_float_glass
## 4: 0 0 49 window_float_glass window_float_glass
## 5: 0 0 41 window_float_glass window_float_glass
## 6: 0 0 5 window_non-float_glass window_float_glass
## confidence.containers confidence.tableware confidence.vehicle_glass
## 1: 0 0 0
## 2: 0 0 0
## 3: 0 0 0
## 4: 0 0 0
## 5: 0 0 0
## 6: 0 0 0
## confidence.vehicle_headlamp_glass confidence.window_float_glass
## 1: 0 1
## 2: 0 1
## 3: 0 1
## 4: 0 1
## 5: 0 1
## 6: 0 0
## confidence.window_non.float_glass
## 1: 0
## 2: 0
## 3: 0
## 4: 0
## 5: 0
## 6: 1
= as_resample_result(orun)
rr rr
## <ResampleResult> of 10 iterations
## * Task: prnn_fglass
## * Learner: oml.3364
## * Warnings: 0 in 0 iterations
## * Errors: 0 in 0 iterations
# R6 class for collection
= OMLCollection$new(232) # or
ocol = ocl(232)
ocol
$tasks ocol
## id task data task_type target nrow ncol
## 1: 3 <OMLTask[25]> kr-vs-kp Supervised Classification class 3196 37
## 2: 6 <OMLTask[25]> letter Supervised Classification class 20000 17
## missing numeric symbolic binary task_splits
## 1: 0 0 37 35 crossvalidation
## 2: 0 16 1 0 crossvalidation