Given the non-linear utility specification of WTP space models, these models often diverge during estimation and can be highly sensitive to starting parameters. While many other packages support the estimation of WTP space models, in practice these packages often fail to find solutions.
Compared to most other packages, {logitr} tends to perform better and converge more often. It also has a built-in, parallelized multi-start optimization loop for searching for different local minima from different random starting points when minimizing the negative log-likelihood, which improves the odds of converging to a solution for at least some of the multi-start iterations.
This vignette illustrates the convergence issues, comparing the performance of {logitr} to the following R packages that support WTP space models:
For the same mixed logit WTP space model, only {logitr} is able to consistently converge to a solution.
First load the required packages:
library(logitr)
library(mlogit)
library(gmnl)
library(apollo)
library(mixl)
library(dplyr)
library(tidyr)
set.seed(1234)
Now set the starting parameters for each package as well as the number of draws to use for the simulated log-likelihood:
<- 50
numDraws_wtp <- c(
start_wtp scalePar = 1,
feat = 0,
brandhiland = 0,
brandweight = 0,
brandyoplait = 0,
sd_feat = 0.1,
sd_brandhiland = 0.1,
sd_brandweight = 0.1,
sd_brandyoplait = 0.1
)
Take only half of the yogurt data to speed things up:
<- subset(logitr::yogurt, logitr::yogurt$id <= 50) yogurt
Both {apollo} and {mixl} require that the user hand-specify the model to be estimated as well as several other settings. These settings are provided here.
Convert the yogurt data for into the format required for {gmnl}
<- mlogit.data(
data_gmnl data = yogurt,
shape = "long",
choice = "choice",
id.var = 'id',
alt.var = 'alt',
chid.var = 'obsID'
)
Format the yogurt
data to a “wide” format for
{apollo}
<- yogurt %>%
yogurt_price select(id, obsID, price, brand) %>%
mutate(price = -1*price) %>%
pivot_wider(
names_from = 'brand',
values_from = 'price') %>%
rename(
price_dannon = dannon,
price_hiland = hiland,
price_weight = weight,
price_yoplait = yoplait)
<- yogurt %>%
yogurt_feat select(id, obsID, feat, brand) %>%
pivot_wider(
names_from = 'brand',
values_from = 'feat') %>%
rename(
feat_dannon = dannon,
feat_hiland = hiland,
feat_weight = weight,
feat_yoplait = yoplait)
<- yogurt %>%
yogurt_choice filter(choice == 1) %>%
select(id, obsID, choice = alt)
<- yogurt_price %>%
data_apollo left_join(yogurt_feat, by = c('id', 'obsID')) %>%
left_join(yogurt_choice, by = c('id', 'obsID')) %>%
arrange(id, obsID) %>%
mutate(
av_dannon = 1,
av_hiland = 1,
av_weight = 1,
av_yoplait = 1
)
Define the {apollo} probabilities function
<- function(
apollo_probabilities_wtp functionality = "estimate"
apollo_beta, apollo_inputs,
) {
### Attach inputs and detach after function exit
apollo_attach(apollo_beta, apollo_inputs)
on.exit(apollo_detach(apollo_beta, apollo_inputs))
### Create list of probabilities P
<- list()
P
### List of utilities: these must use the same names as in mnl_settings,
# order is irrelevant
<- list()
V "dannon"]] <- scalePar * (b_feat * feat_dannon - price_dannon)
V[["hiland"]] <- scalePar * (b_brandhiland + b_feat * feat_hiland - price_hiland)
V[["weight"]] <- scalePar * (b_brandweight + b_feat * feat_weight - price_weight)
V[["yoplait"]] <- scalePar * (b_brandyoplait + b_feat * feat_yoplait - price_yoplait)
V[[
### Define settings for MNL model component
<- list(
mnl_settings alternatives = c(dannon = 1, hiland = 2, weight = 3, yoplait = 4),
avail = list(
dannon = av_dannon,
hiland = av_hiland,
weight = av_weight,
yoplait = av_yoplait),
choiceVar = choice,
utilities = V
)
### Compute probabilities using MNL model
"model"]] <- apollo_mnl(mnl_settings, functionality)
P[[### Take product across observation for same individual
<- apollo_panelProd(P, apollo_inputs, functionality)
P ### Average across inter-individual draws
= apollo_avgInterDraws(P, apollo_inputs, functionality)
P ### Prepare and return outputs of function
<- apollo_prepareProb(P, apollo_inputs, functionality)
P
return(P)
}
Define random parameters function
<- function(apollo_beta, apollo_inputs) {
apollo_randCoeff <- list()
randcoeff 'b_feat']] <- feat + d_feat * sd_feat
randcoeff[['b_brandhiland']] <- brandhiland + d_brandhiland * sd_brandhiland
randcoeff[['b_brandweight']] <- brandweight + d_brandweight * sd_brandweight
randcoeff[['b_brandyoplait']] <- brandyoplait + d_brandyoplait * sd_brandyoplait
randcoeff[[return(randcoeff)
}
Main control settings for {apollo}
<- list(
apollo_control_wtp modelName = "MXL_WTP_space",
modelDescr = "MXL model on yogurt choice SP data, in WTP space",
indivID = "id",
mixing = TRUE,
analyticGrad = TRUE,
panelData = TRUE,
nCores = 1
)
# Set parameters for generating draws
<- list(
apollo_draws_n interDrawsType = "halton",
interNDraws = numDraws_wtp,
interUnifDraws = c(),
interNormDraws = c(
"d_feat", "d_brandhiland", "d_brandweight", "d_brandyoplait"),
intraDrawsType = "halton",
intraNDraws = 0,
intraUnifDraws = c(),
intraNormDraws = c()
)
# Set input
<- apollo_validateInputs(
apollo_inputs_wtp apollo_beta = start_wtp,
apollo_fixed = NULL,
database = data_apollo,
apollo_draws = apollo_draws_n,
apollo_randCoeff = apollo_randCoeff,
apollo_control = apollo_control_wtp
)
Format the yogurt
data to a “wide” format for {mixl}
<- data_apollo # Uses the same "wide" format as {apollo}
data_mixl $ID <- data_mixl$id
data_mixl$CHOICE <- data_mixl$choice data_mixl
Define the {mixl} utility function
<- "
mixl_wtp feat_RND = @feat + draw_1 * @sd_feat;
brandhiland_RND = @brandhiland + draw_2 * @sd_brandhiland;
brandweight_RND = @brandweight + draw_3 * @sd_brandweight;
brandyoplait_RND = @brandyoplait + draw_4 * @sd_brandyoplait;
U_1 = @scalePar * (feat_RND * $feat_dannon - $price_dannon);
U_2 = @scalePar * (brandhiland_RND + feat_RND * $feat_hiland - $price_hiland);
U_3 = @scalePar * (brandweight_RND + feat_RND * $feat_weight - $price_weight);
U_4 = @scalePar * (brandyoplait_RND + feat_RND * $feat_yoplait - $price_yoplait);
"
<- specify_model(mixl_wtp, data_mixl)
mixl_spec_wtp <- generate_default_availabilities(data_mixl, 4) availabilities
The same model is now estimated using all five packages.
<- logitr(
model_logitr data = yogurt,
outcome = 'choice',
obsID = 'obsID',
panelID = 'id',
pars = c('feat', 'brand'),
scalePar = 'price',
randPars = c(feat = "n", brand = "n"),
startVals = start_wtp,
numDraws = numDraws_wtp
)
{logitr} converges, even without running a multi-start:
summary(model_logitr)
#> =================================================
#>
#> Model estimated on: Sat Oct 01 16:46:57 2022
#>
#> Using logitr version: 0.8.0
#>
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat",
#> "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"),
#> panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp,
#> numCores = numCores)
#>
#> Frequencies of alternatives:
#> 1 2 3 4
#> 0.415721 0.029984 0.170989 0.383306
#>
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>
#> Model Type: Mixed Logit
#> Model Space: Willingness-to-Pay
#> Model Run: 1 of 1
#> Iterations: 85
#> Elapsed Time: 0h:0m:4s
#> Algorithm: NLOPT_LD_LBFGS
#> Weights Used?: FALSE
#> Panel ID: id
#> Robust? FALSE
#>
#> Model Coefficients:
#> Estimate Std. Error z-value Pr(>|z|)
#> scalePar 0.388098 0.050706 7.6538 1.954e-14 ***
#> feat 1.738832 0.725107 2.3980 0.01648 *
#> brandhiland -13.536714 1.810701 -7.4760 7.661e-14 ***
#> brandweight -4.510068 1.015856 -4.4397 9.010e-06 ***
#> brandyoplait 6.663869 0.929658 7.1681 7.605e-13 ***
#> sd_feat 0.491225 1.072013 0.4582 0.64679
#> sd_brandhiland 5.730571 1.125803 5.0902 3.577e-07 ***
#> sd_brandweight 11.420500 1.727799 6.6099 3.847e-11 ***
#> sd_brandyoplait 8.872470 1.366376 6.4934 8.390e-11 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Log-Likelihood: -732.2132421
#> Null Log-Likelihood: -1710.6872416
#> AIC: 1482.4264842
#> BIC: 1528.4886000
#> McFadden R2: 0.5719771
#> Adj McFadden R2: 0.5667161
#> Number of Observations: 1234.0000000
#>
#> Summary of 10k Draws for Random Coefficients:
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> feat -Inf 1.4071720 1.738457 1.738145 2.069629 Inf
#> brandhiland -Inf -17.4039453 -13.538553 -13.542027 -9.674388 Inf
#> brandweight -Inf -12.2189602 -4.519436 -4.527690 3.182253 Inf
#> brandyoplait -Inf 0.6670773 6.648678 6.641070 12.632059 Inf
Including a multi-start helps build confidence in the solution reached:
<- logitr(
model_logitr data = yogurt,
outcome = 'choice',
obsID = 'obsID',
panelID = 'id',
pars = c('feat', 'brand'),
scalePar = 'price',
randPars = c(feat = "n", brand = "n"),
startVals = start_wtp,
numDraws = numDraws_wtp,
numMultiStarts = 10
)
summary(model_logitr)
#> =================================================
#>
#> Model estimated on: Sat Oct 01 16:46:57 2022
#>
#> Using logitr version: 0.8.0
#>
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat",
#> "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"),
#> panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp,
#> numCores = numCores)
#>
#> Frequencies of alternatives:
#> 1 2 3 4
#> 0.415721 0.029984 0.170989 0.383306
#>
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>
#> Model Type: Mixed Logit
#> Model Space: Willingness-to-Pay
#> Model Run: 1 of 1
#> Iterations: 85
#> Elapsed Time: 0h:0m:4s
#> Algorithm: NLOPT_LD_LBFGS
#> Weights Used?: FALSE
#> Panel ID: id
#> Robust? FALSE
#>
#> Model Coefficients:
#> Estimate Std. Error z-value Pr(>|z|)
#> scalePar 0.388098 0.050706 7.6538 1.954e-14 ***
#> feat 1.738832 0.725107 2.3980 0.01648 *
#> brandhiland -13.536714 1.810701 -7.4760 7.661e-14 ***
#> brandweight -4.510068 1.015856 -4.4397 9.010e-06 ***
#> brandyoplait 6.663869 0.929658 7.1681 7.605e-13 ***
#> sd_feat 0.491225 1.072013 0.4582 0.64679
#> sd_brandhiland 5.730571 1.125803 5.0902 3.577e-07 ***
#> sd_brandweight 11.420500 1.727799 6.6099 3.847e-11 ***
#> sd_brandyoplait 8.872470 1.366376 6.4934 8.390e-11 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Log-Likelihood: -732.2132421
#> Null Log-Likelihood: -1710.6872416
#> AIC: 1482.4264842
#> BIC: 1528.4886000
#> McFadden R2: 0.5719771
#> Adj McFadden R2: 0.5667161
#> Number of Observations: 1234.0000000
#>
#> Summary of 10k Draws for Random Coefficients:
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> feat -Inf 1.4071720 1.738457 1.738145 2.069629 Inf
#> brandhiland -Inf -17.4039453 -13.538553 -13.542027 -9.674388 Inf
#> brandweight -Inf -12.2189602 -4.519436 -4.527690 3.182253 Inf
#> brandyoplait -Inf 0.6670773 6.648678 6.641070 12.632059 Inf
First attempt using same starting points as {logitr}
<- estimate(
model_mixl
mixl_spec_wtp, start_wtp,
data_mixl, availabilities,nDraws = numDraws_wtp
)
{mixl} converges to a local minimum:
c(logLik(model_logitr), logLik(model_mixl))
#> [1] -732.2132 -1544.8531
cbind(coef(model_logitr), coef(model_mixl))
#> [,1] [,2]
#> scalePar 0.3880976 -2270.55731
#> feat 1.7388324 42.24281
#> brandhiland -13.5367140 24.36731
#> brandweight -4.5100684 110.99687
#> brandyoplait 6.6638687 -492.36959
#> sd_feat 0.4912255 10.81470
#> sd_brandhiland 5.7305708 10.23414
#> sd_brandweight 11.4204997 334.92761
#> sd_brandyoplait 8.8724698 915.14924
Second attempt using {logitr} solution as starting points
<- estimate(
model_mixl coef(model_logitr),
mixl_spec_wtp,
data_mixl, availabilities,nDraws = numDraws_wtp
)
Again, {mixl} converges to a local minimum:
c(logLik(model_logitr), logLik(model_mixl))
#> [1] -732.2132 -1544.8531
cbind(coef(model_logitr), coef(model_mixl))
#> [,1] [,2]
#> scalePar 0.3880976 -2270.55731
#> feat 1.7388324 42.24281
#> brandhiland -13.5367140 24.36731
#> brandweight -4.5100684 110.99687
#> brandyoplait 6.6638687 -492.36959
#> sd_feat 0.4912255 10.81470
#> sd_brandhiland 5.7305708 10.23414
#> sd_brandweight 11.4204997 334.92761
#> sd_brandyoplait 8.8724698 915.14924
First attempt using same starting points as {logitr}. Note that additional starting parameters must be added as the {gmnl} approach to estimating WTP is a slightly different model.
<- gmnl(
model_gmnl data = data_gmnl,
formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
ranp = c(
feat = "n", brandhiland = "n", brandweight = "n",
brandyoplait = "n"),
fixed = c(TRUE, rep(FALSE, 10), TRUE),
model = "gmnl",
method = "bfgs",
haltons = NA,
panel = TRUE,
start = c(start_wtp, 0.1, 0.1, 0),
R = numDraws_wtp
)
{gmnl} converges to a local minimum:
c(logLik(model_logitr), logLik(model_gmnl))
#> [1] -732.2132 -1710.6872
cbind(coef(model_logitr), coef(model_gmnl))
#> [,1] [,2]
#> feat 0.3880976 8.1540692
#> brandhiland 1.7388324 4.4266391
#> brandweight -13.5367140 20.9581382
#> brandyoplait -4.5100684 -93.0969112
#> het.(Intercept) 6.6638687 -440.5183170
#> sd.feat 0.4912255 2.4208585
#> sd.brandhiland 5.7305708 0.1085233
#> sd.brandweight 11.4204997 53.2376327
#> sd.brandyoplait 8.8724698 95.0008933
#> tau 0.3880976 653.9956379
Second attempt using {logitr} solution as starting points:
<- gmnl(
model_gmnl data = data_gmnl,
formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
ranp = c(
feat = "n", brandhiland = "n", brandweight = "n",
brandyoplait = "n"),
fixed = c(TRUE, rep(FALSE, 10), TRUE),
model = "gmnl",
method = "bfgs",
haltons = NA,
panel = TRUE,
start = c(coef(model_logitr), 0.1, 0.1, 0),
R = numDraws_wtp
)
#> Error in optim(par = c(feat = 1.74, brandhiland = -13.54, brandweight = -4.51, :
#> initial value in 'vmmin' is not finite
In this case, {gmnl} fails due to an error with the starting values provided.
First attempt using same starting points as {logitr}:
<- apollo_estimate(
model_apollo apollo_beta = start_wtp,
apollo_fixed = NULL,
apollo_probabilities = apollo_probabilities_wtp,
apollo_inputs = apollo_inputs_wtp,
estimate_settings = list(printLevel = 0)
)
{apollo} converges to a local minimum:
c(logLik(model_logitr), logLik(model_apollo))
#> [1] -732.2132 -776.5525
cbind(coef(model_logitr), coef(model_apollo))
#> [,1] [,2]
#> scalePar 0.3880976 0.02264706
#> feat 1.7388324 52.92881692
#> brandhiland -13.5367140 -129.27886303
#> brandweight -4.5100684 -104.76878544
#> brandyoplait 6.6638687 23.77062341
#> sd_feat 0.4912255 11.06432598
#> sd_brandhiland 5.7305708 -86.57222932
#> sd_brandweight 11.4204997 164.38217567
#> sd_brandyoplait 8.8724698 156.45076147
Second attempt using {logitr} solution as starting points:
<- apollo_estimate(
model_apollo apollo_beta = coef(model_logitr),
apollo_fixed = NULL,
apollo_probabilities = apollo_probabilities_wtp,
apollo_inputs = apollo_inputs_wtp,
estimate_settings = list(printLevel = 0)
)
Again, {apollo} converges to a local minimum:
c(logLik(model_logitr), logLik(model_apollo))
#> [1] -732.2132 -772.6268
cbind(coef(model_logitr), coef(model_apollo))
#> [,1] [,2]
#> scalePar 0.3880976 2.180707e-03
#> feat 1.7388324 7.365495e+02
#> brandhiland -13.5367140 -2.378832e+03
#> brandweight -4.5100684 -1.382908e+03
#> brandyoplait 6.6638687 1.571897e+02
#> sd_feat 0.4912255 -3.484523e+02
#> sd_brandhiland 5.7305708 9.805109e+02
#> sd_brandweight 11.4204997 1.899130e+03
#> sd_brandyoplait 8.8724698 1.211976e+03