VariableSelection()
.VariableSelection()
can accept either a
BranchGLM
object or a formula along with the data and the
desired family and link to perform the variable selection.VariableSelection()
returns some information about the
search, more detailed information about the best models can be seen by
using the summary()
function.VariableSelection()
will properly handle
interaction terms and categorical variables.keep
can also be specified if any set of variables are
desired to be kept in every model.# Loading BranchGLM package
library(BranchGLM)
# Fitting gamma regression model
<- mtcars
cars
# Fitting gamma regression with inverse link
<- BranchGLM(mpg ~ ., data = cars, family = "gamma", link = "inverse")
GammaFit
# Forward selection with mtcars
<- VariableSelection(GammaFit, type = "forward")
forwardVS
forwardVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using forward selection with AIC
#> The best value of AIC obtained was 142.2
#> Number of models fit: 27
#>
#> Order the variables were added to the model:
#>
#> 1). wt
#> 2). hp
## Getting final model
fit(summary(forwardVS), which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ hp + wt
#>
#> Estimate SE t p.values
#> (Intercept) -8.923e-03 2.806e-03 -3.180 0.0034910 **
#> hp -8.887e-05 2.110e-05 -4.212 0.0002245 ***
#> wt -9.826e-03 1.384e-03 -7.100 8.21e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0104
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.33 on 29 degrees of freedom
#> AIC: 142.2
#> Algorithm converged in 3 iterations using Fisher's scoring
# Backward elimination with mtcars
<- VariableSelection(GammaFit, type = "backward")
backwardVS
backwardVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using backward elimination with AIC
#> The best value of AIC obtained was 141.9
#> Number of models fit: 49
#>
#> Order the variables were removed from the model:
#>
#> 1). vs
#> 2). drat
#> 3). am
#> 4). disp
#> 5). carb
#> 6). cyl
## Getting final model
fit(summary(backwardVS), which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ hp + wt + qsec + gear
#>
#> Estimate SE t p.values
#> (Intercept) -4.691e-02 1.782e-02 -2.633 0.01384 *
#> hp -6.284e-05 3.015e-05 -2.084 0.04675 *
#> wt -9.485e-03 1.819e-03 -5.213 1.719e-05 ***
#> qsec 1.299e-03 7.471e-04 1.739 0.09348 .
#> gear 2.662e-03 1.652e-03 1.612 0.11870
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0091
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.29 on 27 degrees of freedom
#> AIC: 141.9
#> Algorithm converged in 3 iterations using Fisher's scoring
showprogress
is true, then progress of the branch
and bound algorithm will be reported occasionally.# Branch and bound with mtcars
<- VariableSelection(GammaFit, type = "branch and bound", showprogress = FALSE)
VS
VS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using branch and bound selection with AIC
#> The best value of AIC obtained was 141.9
#> Number of models fit: 56
## Getting final model
fit(summary(VS), which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ hp + wt + qsec + gear
#>
#> Estimate SE t p.values
#> (Intercept) -4.691e-02 1.782e-02 -2.633 0.01384 *
#> hp -6.284e-05 3.015e-05 -2.084 0.04675 *
#> wt -9.485e-03 1.819e-03 -5.213 1.719e-05 ***
#> qsec 1.299e-03 7.471e-04 1.739 0.09348 .
#> gear 2.662e-03 1.652e-03 1.612 0.11870
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0091
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.29 on 27 degrees of freedom
#> AIC: 141.9
#> Algorithm converged in 3 iterations using Fisher's scoring
BranchGLM
object.# Can also use a formula and data
<- VariableSelection(mpg ~ . ,data = cars, family = "gamma",
formulaVS link = "inverse", type = "branch and bound",
showprogress = FALSE, metric = "AIC")
formulaVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using branch and bound selection with AIC
#> The best value of AIC obtained was 141.9
#> Number of models fit: 56
## Getting final model
fit(summary(formulaVS), which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ hp + wt + qsec + gear
#>
#> Estimate SE t p.values
#> (Intercept) -4.691e-02 1.782e-02 -2.633 0.01384 *
#> hp -6.284e-05 3.015e-05 -2.084 0.04675 *
#> wt -9.485e-03 1.819e-03 -5.213 1.719e-05 ***
#> qsec 1.299e-03 7.471e-04 1.739 0.09348 .
#> gear 2.662e-03 1.652e-03 1.612 0.11870
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0091
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.29 on 27 degrees of freedom
#> AIC: 141.9
#> Algorithm converged in 3 iterations using Fisher's scoring
# Finding top 10 models
<- VariableSelection(mpg ~ . ,data = cars, family = "gamma",
formulaVS link = "inverse", type = "branch and bound",
showprogress = FALSE, metric = "AIC",
bestmodels = 10)
formulaVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using branch and bound selection with AIC
#> Found the top 10 models
#> The range of AIC values for the top 10 models is (141.9, 143.59)
#> Number of models fit: 116
## Getting summary and plotting results
<- summary(formulaVS)
formulasumm plot(formulasumm, type = "b")
plot(formulasumm, ptype = "variables")
## Getting best model
fit(formulasumm, which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ hp + wt + qsec + gear
#>
#> Estimate SE t p.values
#> (Intercept) -4.691e-02 1.782e-02 -2.633 0.01384 *
#> hp -6.284e-05 3.015e-05 -2.084 0.04675 *
#> wt -9.485e-03 1.819e-03 -5.213 1.719e-05 ***
#> qsec 1.299e-03 7.471e-04 1.739 0.09348 .
#> gear 2.662e-03 1.652e-03 1.612 0.11870
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0091
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.29 on 27 degrees of freedom
#> AIC: 141.9
#> Algorithm converged in 3 iterations using Fisher's scoring
# Finding all models with a AIC within 2 of the best model
<- VariableSelection(mpg ~ . ,data = cars, family = "gamma",
formulaVS link = "inverse", type = "branch and bound",
showprogress = FALSE, metric = "AIC",
cutoff = 2)
formulaVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using branch and bound selection with AIC
#> Found the top 16 models
#> The range of AIC values for the top 16 models is (141.9, 143.9)
#> Number of models fit: 110
## Getting summary and plotting results
<- summary(formulaVS)
formulasumm plot(formulasumm, type = "b")
plot(formulasumm, ptype = "variables")
keep
will ensure that those
variables are kept through the selection process.# Example of using keep
<- VariableSelection(mpg ~ . ,data = cars, family = "gamma",
keepVS link = "inverse", type = "branch and bound",
keep = c("hp", "cyl"), metric = "AIC",
showprogress = FALSE, bestmodels = 10)
keepVS#> Variable Selection Info:
#> ------------------------
#> Variables were selected using branch and bound selection with AIC
#> Found the top 10 models
#> The range of AIC values for the top 10 models is (143.17, 145.24)
#> Number of models fit: 50
## Getting summary and plotting results
<- summary(keepVS)
keepsumm plot(keepsumm, type = "b")
plot(keepsumm, ptype = "variables")
## Getting final model
fit(keepsumm, which = 1)
#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ cyl + hp + wt + qsec + gear
#>
#> Estimate SE t p.values
#> (Intercept) -6.464e-02 2.700e-02 -2.3940 0.02418 *
#> cyl 1.412e-03 1.642e-03 0.8603 0.39750
#> hp -7.523e-05 3.321e-05 -2.2650 0.03205 *
#> wt -1.037e-02 2.082e-03 -4.9830 3.517e-05 ***
#> qsec 1.816e-03 9.462e-04 1.9190 0.06603 .
#> gear 3.861e-03 2.155e-03 1.7910 0.08490 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0089
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.29 on 26 degrees of freedom
#> AIC: 143.17
#> Algorithm converged in 3 iterations using Fisher's scoring