WTP space convergence issues in other packages

Given the non-linear utility specification of WTP space models, these models often diverge during estimation and can be highly sensitive to starting parameters. While many other packages support the estimation of WTP space models, in practice these packages often fail to find solutions.

Compared to most other packages, {logitr} tends to perform better and converge more often. It also has a built-in, parallelized multi-start optimization loop for searching for different local minima from different random starting points when minimizing the negative log-likelihood, which improves the odds of converging to a solution for at least some of the multi-start iterations.

This vignette illustrates the convergence issues, comparing the performance of {logitr} to the following R packages that support WTP space models:

For the same mixed logit WTP space model, only {logitr} is able to consistently converge to a solution.

Setup

Basic settings

First load the required packages:

library(logitr)
library(mlogit)
library(gmnl)
library(apollo)
library(mixl)
library(dplyr)
library(tidyr)

set.seed(1234)

Now set the starting parameters for each package as well as the number of draws to use for the simulated log-likelihood:

numDraws_wtp <- 50
start_wtp <- c(
    scalePar        = 1,
    feat            = 0,
    brandhiland     = 0,
    brandweight     = 0,
    brandyoplait    = 0,
    sd_feat         = 0.1,
    sd_brandhiland  = 0.1,
    sd_brandweight  = 0.1,
    sd_brandyoplait = 0.1
)

Take only half of the yogurt data to speed things up:

yogurt <- subset(logitr::yogurt, logitr::yogurt$id <= 50)

Package-specific settings

Both {apollo} and {mixl} require that the user hand-specify the model to be estimated as well as several other settings. These settings are provided here.

Prep for {gmnl}

Convert the yogurt data for into the format required for {gmnl}

data_gmnl <- mlogit.data(
    data     = yogurt,
    shape    = "long",
    choice   = "choice",
    id.var   = 'id',
    alt.var  = 'alt',
    chid.var = 'obsID'
)

Prep for {apollo}

Format the yogurt data to a “wide” format for {apollo}

yogurt_price <- yogurt %>%
    select(id, obsID, price, brand) %>%
    mutate(price = -1*price) %>%
    pivot_wider(
        names_from  = 'brand',
        values_from = 'price') %>%
    rename(
        price_dannon  = dannon,
        price_hiland  = hiland,
        price_weight  = weight,
        price_yoplait = yoplait)
yogurt_feat <- yogurt %>%
    select(id, obsID, feat, brand) %>%
    pivot_wider(
        names_from = 'brand',
        values_from = 'feat') %>%
    rename(
        feat_dannon  = dannon,
        feat_hiland  = hiland,
        feat_weight  = weight,
        feat_yoplait = yoplait)
yogurt_choice <- yogurt %>%
    filter(choice == 1) %>%
    select(id, obsID, choice = alt)
data_apollo <- yogurt_price %>%
    left_join(yogurt_feat, by = c('id', 'obsID')) %>%
    left_join(yogurt_choice, by = c('id', 'obsID')) %>%
    arrange(id, obsID) %>%
    mutate(
      av_dannon  = 1,
      av_hiland  = 1,
      av_weight  = 1,
      av_yoplait = 1
    )

Define the {apollo} probabilities function

apollo_probabilities_wtp <- function(
  apollo_beta, apollo_inputs, functionality = "estimate"
) {

    ### Attach inputs and detach after function exit
    apollo_attach(apollo_beta, apollo_inputs)
    on.exit(apollo_detach(apollo_beta, apollo_inputs))

    ### Create list of probabilities P
    P <- list()

    ### List of utilities: these must use the same names as in mnl_settings,
    #   order is irrelevant
    V <- list()
    V[["dannon"]] <- scalePar * (b_feat * feat_dannon - price_dannon)
    V[["hiland"]] <- scalePar * (b_brandhiland + b_feat * feat_hiland - price_hiland)
    V[["weight"]] <- scalePar * (b_brandweight + b_feat * feat_weight - price_weight)
    V[["yoplait"]] <- scalePar * (b_brandyoplait + b_feat * feat_yoplait - price_yoplait)

    ### Define settings for MNL model component
    mnl_settings <- list(
        alternatives = c(dannon = 1, hiland = 2, weight = 3, yoplait = 4),
        avail = list(
          dannon = av_dannon,
          hiland = av_hiland,
          weight = av_weight,
          yoplait = av_yoplait),
        choiceVar = choice,
        utilities = V
    )

    ### Compute probabilities using MNL model
    P[["model"]] <- apollo_mnl(mnl_settings, functionality)
    ### Take product across observation for same individual
    P <- apollo_panelProd(P, apollo_inputs, functionality)
    ### Average across inter-individual draws
    P = apollo_avgInterDraws(P, apollo_inputs, functionality)
    ### Prepare and return outputs of function
    P <- apollo_prepareProb(P, apollo_inputs, functionality)

    return(P)
}

Define random parameters function

apollo_randCoeff <- function(apollo_beta, apollo_inputs) {
    randcoeff <- list()
    randcoeff[['b_feat']] <- feat + d_feat * sd_feat
    randcoeff[['b_brandhiland']] <- brandhiland + d_brandhiland * sd_brandhiland
    randcoeff[['b_brandweight']] <- brandweight + d_brandweight * sd_brandweight
    randcoeff[['b_brandyoplait']] <- brandyoplait + d_brandyoplait * sd_brandyoplait
    return(randcoeff)
}

Main control settings for {apollo}

apollo_control_wtp <- list(
    modelName       = "MXL_WTP_space",
    modelDescr      = "MXL model on yogurt choice SP data, in WTP space",
    indivID         = "id",
    mixing          = TRUE,
    analyticGrad    = TRUE,
    panelData       = TRUE,
    nCores          = 1
)

# Set parameters for generating draws
apollo_draws_n <- list(
    interDrawsType = "halton",
    interNDraws    = numDraws_wtp,
    interUnifDraws = c(),
    interNormDraws = c(
        "d_feat", "d_brandhiland", "d_brandweight", "d_brandyoplait"),
    intraDrawsType = "halton",
    intraNDraws    = 0,
    intraUnifDraws = c(),
    intraNormDraws = c()
)

# Set input
apollo_inputs_wtp <- apollo_validateInputs(
    apollo_beta      = start_wtp,
    apollo_fixed     = NULL,
    database         = data_apollo,
    apollo_draws     = apollo_draws_n,
    apollo_randCoeff = apollo_randCoeff,
    apollo_control   = apollo_control_wtp
)

Prep for {mixl}

Format the yogurt data to a “wide” format for {mixl}

data_mixl <- data_apollo # Uses the same "wide" format as {apollo}
data_mixl$ID <- data_mixl$id
data_mixl$CHOICE <- data_mixl$choice

Define the {mixl} utility function

mixl_wtp <- "
    feat_RND = @feat + draw_1 * @sd_feat;
    brandhiland_RND = @brandhiland + draw_2 * @sd_brandhiland;
    brandweight_RND = @brandweight + draw_3 * @sd_brandweight;
    brandyoplait_RND = @brandyoplait + draw_4 * @sd_brandyoplait;
    U_1 = @scalePar * (feat_RND * $feat_dannon - $price_dannon);
    U_2 = @scalePar * (brandhiland_RND + feat_RND * $feat_hiland - $price_hiland);
    U_3 = @scalePar * (brandweight_RND + feat_RND * $feat_weight - $price_weight);
    U_4 = @scalePar * (brandyoplait_RND + feat_RND * $feat_yoplait - $price_yoplait);
"
mixl_spec_wtp <- specify_model(mixl_wtp, data_mixl)
availabilities <- generate_default_availabilities(data_mixl, 4)

Estimate WTP space models

The same model is now estimated using all five packages.

{logitr}

model_logitr <- logitr(
    data      = yogurt,
    outcome   = 'choice',
    obsID     = 'obsID',
    panelID   = 'id',
    pars      = c('feat', 'brand'),
    scalePar  = 'price',
    randPars  = c(feat = "n", brand = "n"),
    startVals = start_wtp,
    numDraws  = numDraws_wtp
)

{logitr} converges, even without running a multi-start:

summary(model_logitr)
#> =================================================
#> 
#> Model estimated on: Sat Oct 01 16:46:57 2022 
#> 
#> Using logitr version: 0.8.0 
#> 
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat", 
#>     "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"), 
#>     panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp, 
#>     numCores = numCores)
#> 
#> Frequencies of alternatives:
#>        1        2        3        4 
#> 0.415721 0.029984 0.170989 0.383306 
#> 
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>                                  
#> Model Type:           Mixed Logit
#> Model Space:   Willingness-to-Pay
#> Model Run:                 1 of 1
#> Iterations:                    85
#> Elapsed Time:            0h:0m:4s
#> Algorithm:         NLOPT_LD_LBFGS
#> Weights Used?:              FALSE
#> Panel ID:                      id
#> Robust?                     FALSE
#> 
#> Model Coefficients: 
#>                   Estimate Std. Error z-value  Pr(>|z|)    
#> scalePar          0.388098   0.050706  7.6538 1.954e-14 ***
#> feat              1.738832   0.725107  2.3980   0.01648 *  
#> brandhiland     -13.536714   1.810701 -7.4760 7.661e-14 ***
#> brandweight      -4.510068   1.015856 -4.4397 9.010e-06 ***
#> brandyoplait      6.663869   0.929658  7.1681 7.605e-13 ***
#> sd_feat           0.491225   1.072013  0.4582   0.64679    
#> sd_brandhiland    5.730571   1.125803  5.0902 3.577e-07 ***
#> sd_brandweight   11.420500   1.727799  6.6099 3.847e-11 ***
#> sd_brandyoplait   8.872470   1.366376  6.4934 8.390e-11 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>                                      
#> Log-Likelihood:          -732.2132421
#> Null Log-Likelihood:    -1710.6872416
#> AIC:                     1482.4264842
#> BIC:                     1528.4886000
#> McFadden R2:                0.5719771
#> Adj McFadden R2:            0.5667161
#> Number of Observations:  1234.0000000
#> 
#> Summary of 10k Draws for Random Coefficients: 
#>              Min.     1st Qu.     Median       Mean   3rd Qu. Max.
#> feat         -Inf   1.4071720   1.738457   1.738145  2.069629  Inf
#> brandhiland  -Inf -17.4039453 -13.538553 -13.542027 -9.674388  Inf
#> brandweight  -Inf -12.2189602  -4.519436  -4.527690  3.182253  Inf
#> brandyoplait -Inf   0.6670773   6.648678   6.641070 12.632059  Inf

Including a multi-start helps build confidence in the solution reached:

model_logitr <- logitr(
    data      = yogurt,
    outcome   = 'choice',
    obsID     = 'obsID',
    panelID   = 'id',
    pars      = c('feat', 'brand'),
    scalePar  = 'price',
    randPars  = c(feat = "n", brand = "n"),
    startVals = start_wtp,
    numDraws  = numDraws_wtp,
    numMultiStarts = 10
)
summary(model_logitr)
#> =================================================
#> 
#> Model estimated on: Sat Oct 01 16:46:57 2022 
#> 
#> Using logitr version: 0.8.0 
#> 
#> Call:
#> logitr(data = yogurt, outcome = "choice", obsID = "obsID", pars = c("feat", 
#>     "brand"), scalePar = "price", randPars = c(feat = "n", brand = "n"), 
#>     panelID = "id", startVals = start_wtp, numDraws = numDraws_wtp, 
#>     numCores = numCores)
#> 
#> Frequencies of alternatives:
#>        1        2        3        4 
#> 0.415721 0.029984 0.170989 0.383306 
#> 
#> Exit Status: 3, Optimization stopped because ftol_rel or ftol_abs was reached.
#>                                  
#> Model Type:           Mixed Logit
#> Model Space:   Willingness-to-Pay
#> Model Run:                 1 of 1
#> Iterations:                    85
#> Elapsed Time:            0h:0m:4s
#> Algorithm:         NLOPT_LD_LBFGS
#> Weights Used?:              FALSE
#> Panel ID:                      id
#> Robust?                     FALSE
#> 
#> Model Coefficients: 
#>                   Estimate Std. Error z-value  Pr(>|z|)    
#> scalePar          0.388098   0.050706  7.6538 1.954e-14 ***
#> feat              1.738832   0.725107  2.3980   0.01648 *  
#> brandhiland     -13.536714   1.810701 -7.4760 7.661e-14 ***
#> brandweight      -4.510068   1.015856 -4.4397 9.010e-06 ***
#> brandyoplait      6.663869   0.929658  7.1681 7.605e-13 ***
#> sd_feat           0.491225   1.072013  0.4582   0.64679    
#> sd_brandhiland    5.730571   1.125803  5.0902 3.577e-07 ***
#> sd_brandweight   11.420500   1.727799  6.6099 3.847e-11 ***
#> sd_brandyoplait   8.872470   1.366376  6.4934 8.390e-11 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>                                      
#> Log-Likelihood:          -732.2132421
#> Null Log-Likelihood:    -1710.6872416
#> AIC:                     1482.4264842
#> BIC:                     1528.4886000
#> McFadden R2:                0.5719771
#> Adj McFadden R2:            0.5667161
#> Number of Observations:  1234.0000000
#> 
#> Summary of 10k Draws for Random Coefficients: 
#>              Min.     1st Qu.     Median       Mean   3rd Qu. Max.
#> feat         -Inf   1.4071720   1.738457   1.738145  2.069629  Inf
#> brandhiland  -Inf -17.4039453 -13.538553 -13.542027 -9.674388  Inf
#> brandweight  -Inf -12.2189602  -4.519436  -4.527690  3.182253  Inf
#> brandyoplait -Inf   0.6670773   6.648678   6.641070 12.632059  Inf

{mixl}

First attempt using same starting points as {logitr}

model_mixl <- estimate(
    mixl_spec_wtp, start_wtp,
    data_mixl, availabilities,
    nDraws = numDraws_wtp
)

{mixl} converges to a local minimum:

c(logLik(model_logitr), logLik(model_mixl))
#> [1]  -732.2132 -1544.8531
cbind(coef(model_logitr), coef(model_mixl))
#>                        [,1]        [,2]
#> scalePar          0.3880976 -2270.55731
#> feat              1.7388324    42.24281
#> brandhiland     -13.5367140    24.36731
#> brandweight      -4.5100684   110.99687
#> brandyoplait      6.6638687  -492.36959
#> sd_feat           0.4912255    10.81470
#> sd_brandhiland    5.7305708    10.23414
#> sd_brandweight   11.4204997   334.92761
#> sd_brandyoplait   8.8724698   915.14924

Second attempt using {logitr} solution as starting points

model_mixl <- estimate(
    mixl_spec_wtp, coef(model_logitr),
    data_mixl, availabilities,
    nDraws = numDraws_wtp
)

Again, {mixl} converges to a local minimum:

c(logLik(model_logitr), logLik(model_mixl))
#> [1]  -732.2132 -1544.8531
cbind(coef(model_logitr), coef(model_mixl))
#>                        [,1]        [,2]
#> scalePar          0.3880976 -2270.55731
#> feat              1.7388324    42.24281
#> brandhiland     -13.5367140    24.36731
#> brandweight      -4.5100684   110.99687
#> brandyoplait      6.6638687  -492.36959
#> sd_feat           0.4912255    10.81470
#> sd_brandhiland    5.7305708    10.23414
#> sd_brandweight   11.4204997   334.92761
#> sd_brandyoplait   8.8724698   915.14924

{gmnl}

First attempt using same starting points as {logitr}. Note that additional starting parameters must be added as the {gmnl} approach to estimating WTP is a slightly different model.

model_gmnl <- gmnl(
    data = data_gmnl,
    formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
    ranp = c(
        feat = "n", brandhiland = "n", brandweight = "n",
        brandyoplait = "n"),
    fixed = c(TRUE, rep(FALSE, 10), TRUE),
    model = "gmnl",
    method = "bfgs",
    haltons = NA,
    panel = TRUE,
    start = c(start_wtp, 0.1, 0.1, 0),
    R = numDraws_wtp
)

{gmnl} converges to a local minimum:

c(logLik(model_logitr), logLik(model_gmnl))
#> [1]  -732.2132 -1710.6872
cbind(coef(model_logitr), coef(model_gmnl))
#>                        [,1]         [,2]
#> feat              0.3880976    8.1540692
#> brandhiland       1.7388324    4.4266391
#> brandweight     -13.5367140   20.9581382
#> brandyoplait     -4.5100684  -93.0969112
#> het.(Intercept)   6.6638687 -440.5183170
#> sd.feat           0.4912255    2.4208585
#> sd.brandhiland    5.7305708    0.1085233
#> sd.brandweight   11.4204997   53.2376327
#> sd.brandyoplait   8.8724698   95.0008933
#> tau               0.3880976  653.9956379

Second attempt using {logitr} solution as starting points:

model_gmnl <- gmnl(
    data = data_gmnl,
    formula = choice ~ price + feat + brand | 0 | 0 | 0 | 1,
    ranp = c(
        feat = "n", brandhiland = "n", brandweight = "n",
        brandyoplait = "n"),
    fixed = c(TRUE, rep(FALSE, 10), TRUE),
    model = "gmnl",
    method = "bfgs",
    haltons = NA,
    panel = TRUE,
    start = c(coef(model_logitr), 0.1, 0.1, 0),
    R = numDraws_wtp
)
#> Error in optim(par = c(feat = 1.74, brandhiland = -13.54, brandweight = -4.51,  :
#> initial value in 'vmmin' is not finite

In this case, {gmnl} fails due to an error with the starting values provided.

{apollo}

First attempt using same starting points as {logitr}:

model_apollo <- apollo_estimate(
    apollo_beta          = start_wtp,
    apollo_fixed         = NULL,
    apollo_probabilities = apollo_probabilities_wtp,
    apollo_inputs        = apollo_inputs_wtp,
    estimate_settings    = list(printLevel = 0)
)

{apollo} converges to a local minimum:

c(logLik(model_logitr), logLik(model_apollo))
#> [1] -732.2132 -776.5525
cbind(coef(model_logitr), coef(model_apollo))
#>                        [,1]          [,2]
#> scalePar          0.3880976    0.02264706
#> feat              1.7388324   52.92881692
#> brandhiland     -13.5367140 -129.27886303
#> brandweight      -4.5100684 -104.76878544
#> brandyoplait      6.6638687   23.77062341
#> sd_feat           0.4912255   11.06432598
#> sd_brandhiland    5.7305708  -86.57222932
#> sd_brandweight   11.4204997  164.38217567
#> sd_brandyoplait   8.8724698  156.45076147

Second attempt using {logitr} solution as starting points:

model_apollo <- apollo_estimate(
    apollo_beta          = coef(model_logitr),
    apollo_fixed         = NULL,
    apollo_probabilities = apollo_probabilities_wtp,
    apollo_inputs        = apollo_inputs_wtp,
    estimate_settings    = list(printLevel = 0)
)

Again, {apollo} converges to a local minimum:

c(logLik(model_logitr), logLik(model_apollo))
#> [1] -732.2132 -772.6268
cbind(coef(model_logitr), coef(model_apollo))
#>                        [,1]          [,2]
#> scalePar          0.3880976  2.180707e-03
#> feat              1.7388324  7.365495e+02
#> brandhiland     -13.5367140 -2.378832e+03
#> brandweight      -4.5100684 -1.382908e+03
#> brandyoplait      6.6638687  1.571897e+02
#> sd_feat           0.4912255 -3.484523e+02
#> sd_brandhiland    5.7305708  9.805109e+02
#> sd_brandweight   11.4204997  1.899130e+03
#> sd_brandyoplait   8.8724698  1.211976e+03