BranchGLM()
allows fitting of gaussian, binomial,
gamma, and poisson GLMs with a variety of links available.grads
argument is for L-BFGS only and it is the
number of gradients that are stored at a time and are used to
approximate the inverse information. The default value for this is 10,
but another common choice is 5.tol
argument controls how strict the convergence
criteria are, lower values of this will lead to more accurate results,
but may also be slower.method
argument is ignored for linear regression
and the OLS solution is used.init
argument.TRUE
and setting the
nthreads
argument to the desired number of threads
used.# Loading in BranchGLM
library(BranchGLM)
# Fitting gaussian regression models for mtcars dataset
<- mtcars
cars
## Identity link
BranchGLM(mpg ~ ., data = cars, family = "gaussian", link = "identity")
#> Results from gaussian regression with identity link function
#> Using the formula mpg ~ .
#>
#> Estimate SE t p.values
#> (Intercept) 12.30000 18.72000 0.6573 0.51810
#> cyl -0.11140 1.04500 -0.1066 0.91610
#> disp 0.01334 0.01786 0.7468 0.46350
#> hp -0.02148 0.02177 -0.9868 0.33500
#> drat 0.78710 1.63500 0.4813 0.63530
#> wt -3.71500 1.89400 -1.9610 0.06325 .
#> qsec 0.82100 0.73080 1.1230 0.27390
#> vs 0.31780 2.10500 0.1510 0.88140
#> am 2.52000 2.05700 1.2250 0.23400
#> gear 0.65540 1.49300 0.4389 0.66520
#> carb -0.19940 0.82880 -0.2406 0.81220
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 7.0235
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 147.49 on 21 degrees of freedom
#> AIC: 166.19
MASS::gamma.dispersion()
function.# Fitting gamma regression models for mtcars dataset
## Inverse link
<- BranchGLM(mpg ~ ., data = cars, family = "gamma", link = "inverse")
GammaFit
GammaFit#> Results from gamma regression with inverse link function
#> Using the formula mpg ~ .
#>
#> Estimate SE t p.values
#> (Intercept) -6.794e-02 3.085e-02 -2.2020 0.03898 *
#> cyl 1.760e-03 1.946e-03 0.9048 0.37590
#> disp -7.947e-06 3.515e-05 -0.2261 0.82330
#> hp -6.724e-05 4.050e-05 -1.6600 0.11170
#> drat -4.283e-04 2.728e-03 -0.1570 0.87670
#> wt -9.224e-03 3.360e-03 -2.7450 0.01214 *
#> qsec 1.739e-03 1.165e-03 1.4930 0.15040
#> vs -3.086e-04 3.322e-03 -0.0929 0.92690
#> am 6.305e-04 3.441e-03 0.1832 0.85640
#> gear 4.882e-03 2.577e-03 1.8940 0.07203 .
#> carb -1.025e-03 1.481e-03 -0.6926 0.49610
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0087
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.28 on 21 degrees of freedom
#> AIC: 152.3
#> Algorithm converged in 3 iterations using Fisher's scoring
## Log link
<- BranchGLM(mpg ~ ., data = cars, family = "gamma", link = "log")
GammaFit
GammaFit#> Results from gamma regression with log link function
#> Using the formula mpg ~ .
#>
#> Estimate SE t p.values
#> (Intercept) 2.754e+00 6.934e-01 3.9720 0.0006942 ***
#> cyl 7.509e-03 3.871e-02 0.1940 0.8481000
#> disp 6.521e-05 6.615e-04 0.0986 0.9224000
#> hp -8.898e-04 8.064e-04 -1.1030 0.2823000
#> drat 2.366e-02 6.058e-02 0.3906 0.7000000
#> wt -1.655e-01 7.017e-02 -2.3590 0.0281100 *
#> qsec 3.055e-02 2.707e-02 1.1290 0.2718000
#> vs -1.141e-03 7.796e-02 -0.0146 0.9885000
#> am 5.421e-02 7.618e-02 0.7115 0.4846000
#> gear 6.014e-02 5.531e-02 1.0870 0.2893000
#> carb -2.277e-02 3.070e-02 -0.7418 0.4664000
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 0.0096
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 0.31 on 21 degrees of freedom
#> AIC: 155.65
#> Algorithm converged in 2 iterations using Fisher's scoring
# Fitting poisson regression models for warpbreaks dataset
<- warpbreaks
warp
## Log link
BranchGLM(breaks ~ ., data = warp, family = "poisson", link = "log")
#> Results from poisson regression with log link function
#> Using the formula breaks ~ .
#>
#> Estimate SE z p.values
#> (Intercept) 3.69200 0.04541 81.300 < 2.2e-16 ***
#> woolB -0.20600 0.05157 -3.994 6.490e-05 ***
#> tensionM -0.32130 0.06027 -5.332 9.729e-08 ***
#> tensionH -0.51850 0.06396 -8.107 5.209e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 1
#> 54 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 210.39 on 50 degrees of freedom
#> AIC: 493.06
#> Algorithm converged in 6 iterations using Fisher's scoring
# Fitting binomial regression models for toothgrowth dataset
<- ToothGrowth
Data
## Logit link
BranchGLM(supp ~ ., data = Data, family = "binomial", link = "logit")
#> Results from binomial regression with logit link function
#> Using the formula supp ~ .
#>
#> Estimate SE z p.values
#> (Intercept) 1.5380 0.7860 1.956 0.050440 .
#> len -0.2127 0.0728 -2.921 0.003484 **
#> dose 2.0890 0.8497 2.458 0.013970 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 1
#> 60 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 72.33 on 57 degrees of freedom
#> AIC: 78.33
#> Algorithm converged in 4 iterations using Fisher's scoring
## Probit link
BranchGLM(supp ~ ., data = Data, family = "binomial", link = "probit")
#> Results from binomial regression with probit link function
#> Using the formula supp ~ .
#>
#> Estimate SE z p.values
#> (Intercept) 0.94020 0.46900 2.005 0.045000 *
#> len -0.13180 0.04195 -3.142 0.001678 **
#> dose 1.30800 0.49710 2.631 0.008502 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Dispersion parameter taken to be 1
#> 60 observations used to fit model
#> (0 observations removed due to missingness)
#>
#> Residual Deviance: 72.19 on 57 degrees of freedom
#> AIC: 78.19
#> Algorithm converged in 5 iterations using Fisher's scoring
Table()
creates a confusion matrix based on the
predicted classes and observed classesROC()
creates an ROC curve which can be plotted with
plot()
AUC()
and Cindex()
calculate the area
under the ROC curveMultipleROCCurves()
allows for the plotting of multiple
ROC curves on the same plot# Fitting logistic regression model for toothgrowth dataset
<- BranchGLM(supp ~ ., data = Data, family = "binomial", link = "logit")
catFit
Table(catFit)
#> Confusion matrix:
#> ----------------------
#> Predicted
#> OJ VC
#>
#> OJ 17 13
#> Observed
#> VC 7 23
#>
#> ----------------------
#> Measures:
#> ----------------------
#> Accuracy: 0.6667
#> Sensitivity: 0.7667
#> Specificity: 0.5667
#> PPV: 0.6389
# Creating ROC curve
<- ROC(catFit)
catROC
plot(catROC, main = "ROC Curve", col = "indianred")
# Getting Cindex/AUC
Cindex(catFit)
#> [1] 0.7127778
AUC(catFit)
#> [1] 0.7127778
# Showing ROC plots for logit, probit, and cloglog
<- BranchGLM(supp ~ . ,data = Data, family = "binomial",
probitFit link = "probit")
<- BranchGLM(supp ~ . ,data = Data, family = "binomial",
cloglogFit link = "cloglog")
MultipleROCCurves(catROC, ROC(probitFit), ROC(cloglogFit),
names = c("Logistic ROC", "Probit ROC", "Cloglog ROC"))
BranchGLM
object.
<- predict(catFit)
preds
Table(preds, Data$supp)
#> Confusion matrix:
#> ----------------------
#> Predicted
#> OJ VC
#>
#> OJ 17 13
#> Observed
#> VC 7 23
#>
#> ----------------------
#> Measures:
#> ----------------------
#> Accuracy: 0.6667
#> Sensitivity: 0.7667
#> Specificity: 0.5667
#> PPV: 0.6389
AUC(preds, Data$supp)
#> [1] 0.7127778
ROC(preds, Data$supp) |> plot(main = "ROC Curve", col = "deepskyblue")
coef()
to extract the coefficientslogLik()
to extract the log likelihoodAIC()
to extract the AICBIC()
to extract the BICpredict()
to obtain predictions from the fitted
modelcoefficients
slot of the fitted
model# Predict method
predict(GammaFit)
#> [1] 21.69333 21.15566 25.92103 20.27496 17.15124 19.83386 14.55730 22.26169
#> [9] 23.89767 18.75674 19.10374 15.10160 16.07372 16.13726 12.20297 11.70443
#> [17] 11.60706 27.90334 30.14896 30.14451 23.41000 17.02230 17.63782 13.90043
#> [25] 16.06923 28.65170 26.61054 28.59460 17.89013 19.53099 14.11845 23.30350
# Accessing coefficients matrix
$coefficients
GammaFit#> Estimate SE t p.values
#> (Intercept) 2.754211e+00 0.6933540659 3.97230023 0.00069416
#> cyl 7.509180e-03 0.0387101010 0.19398501 0.84805180
#> disp 6.521183e-05 0.0006614834 0.09858423 0.92240336
#> hp -8.897889e-04 0.0008063589 -1.10346503 0.28231007
#> drat 2.366270e-02 0.0605780301 0.39061524 0.70001605
#> wt -1.655137e-01 0.0701735211 -2.35863431 0.02811042
#> qsec 3.055119e-02 0.0270721947 1.12850802 0.27183333
#> vs -1.140739e-03 0.0779559038 -0.01463313 0.98846300
#> am 5.420526e-02 0.0761831300 0.71151268 0.48459603
#> gear 6.013892e-02 0.0553138294 1.08723110 0.28925667
#> carb -2.277255e-02 0.0306989241 -0.74180288 0.46642281