require(copula)
<- FALSE doExtras
This vignette visualizes (log) likelihood functions of Archimedean copulas, some of which are numerically challenging to compute. Because of this computational challenge, we also check for equivalence of some of the several computational methods, testing for numerical near-equality using all.equal(L1, L2)
.
We start by defining the following auxiliary functions.
##' @title [m]inus Log-Likelihood for Archimedean Copulas ("fast version")
##' @param theta parameter (length 1 for our current families)
##' @param acop Archimedean copula (of class "acopula")
##' @param u data matrix n x d
##' @param n.MC if > 0 MC is applied with sample size equal to n.MC; otherwise,
##' the exact formula is used
##' @param ... potential further arguments, passed to <acop> @dacopula()
##' @return negative log-likelihood
##' @author Martin Maechler (Marius originally)
<- function(theta, acop, u, n.MC=0, ...) { # -(log-likelihood)
mLogL -sum(acop@dacopula(u, theta, n.MC=n.MC, log=TRUE, ...))
}
##' @title Plotting the Negative Log-Likelihood for Archimedean Copulas
##' @param cop an outer_nacopula (currently with no children)
##' @param u n x d data matrix
##' @param xlim x-range for curve() plotting
##' @param main title for curve()
##' @param XtrArgs a list of further arguments for mLogL()
##' @param ... further arguments for curve()
##' @return invisible()
##' @author Martin Maechler
<- function(cop, u, xlim, main, XtrArgs=list(), ...) {
curveLogL <- deparse(substitute(u))
unam stopifnot(is(cop, "outer_nacopula"), is.list(XtrArgs),
<- ncol(u)) >= 2, d == dim(cop),
(d length(cop@childCops) == 0# not yet *nested* A.copulas
)<- cop@copula
acop <- acop@theta # the true theta
th. <- setTheta(acop, NA) # so it's clear, the true theta is not used below
acop if(missing(main)) {
<- cop@copula@tau(th.)
tau. <- substitute("Neg. Log Lik."~ -italic(l)(theta, UU) ~ TXT ~~
main FUN(theta['*'] == Th) %=>% tau['*'] == Tau,
list(UU = unam,
TXT= sprintf("; n=%d, d=%d; A.cop",
nrow(u), d),
FUN = acop@name,
Th = format(th.,digits=3),
Tau = format(tau., digits=3)))
}<- curve(do.call(Vectorize(mLogL, "theta"), c(list(x, acop, u), XtrArgs)),
r xlim=xlim, main=main,
xlab = expression(theta),
ylab = substitute(- log(L(theta, u, ~~ COP)), list(COP=acop@name)),
...)if(is.finite(th.))
axis(1, at = th., labels=expression(theta["*"]),
lwd=2, col="dark gray", tck = -1/30)
else warning("non-finite cop@copula@theta = ", th.)
axis(1, at = initOpt(acop@name),
labels = FALSE, lwd = 2, col = 2, tck = 1/20)
invisible(r)
}
Ensure that we are told about it, if the numerical algorithms choose methods using Rmpfr
(R package interfacing to multi precision arithmetic MPFR):
<- options("copula:verboseUsingRmpfr"=TRUE) # see when "Rmpfr" methods are chosen automatically op
<- 200
n <- 100
d <- 0.2
tau <- copJoe@iTau(tau)
theta <- onacopulaL("Joe", list(theta,1:d))
cop theta
## [1] 1.443824
Here, the three different methods work “the same”:
set.seed(1)
<- rnacopula(n,cop)
U1 enacopula(U1, cop, "mle") # 1.432885 -- fine
## [1] 1.432898
<- 1 + (1:4)/4
th4 <- c(-3558.5, -3734.4, -3299.5, -2505.)
mL.tr <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log.poly")) # default
mLt1 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="log1p"))
mLt2 <- sapply(th4, function(th) mLogL(th, cop@copula, U1, method="poly"))
mLt3 stopifnot(all.equal(mLt1, mL.tr, tolerance=5e-5),
all.equal(mLt2, mL.tr, tolerance=5e-5),
all.equal(mLt3, mL.tr, tolerance=5e-5))
system.time(r1l <- curveLogL(cop, U1, c(1, 2.5), X=list(method="log.poly")))
## User System verstrichen
## 0.430 0.001 0.409
mtext("all three polyJ() methods on top of each other")
system.time({
<- curveLogL(cop, U1, c(1, 2.5), X=list(method="poly"),
r1J add=TRUE, col=adjustcolor("red", .4))
<- curveLogL(cop, U1, c(1, 2.5), X=list(method="log1p"),
r1m add=TRUE, col=adjustcolor("blue",.5))
})
## User System verstrichen
## 0.624 0.000 0.627
<- rnacopula(n,cop)
U2 summary(dCopula(U2, cop)) # => density for the *correct* parameter looks okay
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 4.900e+01 6.430e+02 2.777e+175 1.932e+04 5.553e+177
## hmm: max = 5.5e177
if(doExtras)
system.time(r2 <- curveLogL(cop, U2, c(1, 2.5)))
stopifnot(all.equal(enacopula(U2, cop, "mle"), 1.43992755, tolerance=1e-5),
all.equal(mLogL(1.8, cop@copula, U2), -4070.1953,tolerance=1e-5)) # (was -Inf)
<- rnacopula(n,cop)
U3 <- enacopula(U3, cop, "mle")) # 1.4495 (th.
## [1] 1.449569
system.time(r3 <- curveLogL(cop, U3, c(1, 2.5)))
## User System verstrichen
## 0.316 0.000 0.317
axis(1, at = th., label = quote(hat(theta)))
<- rnacopula(n,cop)
U4 enacopula(U4, cop, "mle") # 1.4519 (prev. was 2.351 : "completely wrong")
## [1] 1.451916
summary(dCopula(U4, cop)) # ok (had one Inf)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 7.500e+01 9.080e+02 1.981e+259 1.434e+04 3.961e+261
if(doExtras)
system.time(r4 <- curveLogL(cop, U4, c(1, 2.5)))
mLogL(2.2351, cop@copula, U4)
## [1] -1789.59
mLogL(1.5, cop@copula, U4)
## [1] -3882.819
mLogL(1.2, cop@copula, U4)
## [1] -3517.366
if(doExtras) # each curve takes almost 2 sec
system.time({
curveLogL(cop, U4, c(1, 1.01))
curveLogL(cop, U4, c(1, 1.0001))
curveLogL(cop, U4, c(1, 1.000001))
})## --> limit goes *VERY* steeply up to 0
## --> theta 1.164 is about the boundary:
stopifnot(identical(setTheta(cop, 1.164), onacopula(cop@copula, C(1.164, 1:100))),
all.equal(600.59577,
@copula@dacopula(U4[118,,drop=FALSE],
coptheta=1.164, log = TRUE), tolerance=1e-5)) # was "Inf"
<- 200
n <- 150
d <- 0.3
tau <- copJoe@iTau(tau)) (theta
## [1] 1.772108
<- onacopulaL("Joe",list(theta,1:d)) cop
set.seed(47)
<- rnacopula(n,cop)
U. enacopula(U., cop, "mle") # 1.784578
## [1] 1.78459
system.time(r. <- curveLogL(cop, U., c(1.1, 3)))
## User System verstrichen
## 0.468 0.000 0.471
## => still looks very good
<- 180
d <- 0.4
tau <- copJoe@iTau(tau)) (theta
## [1] 2.219066
<- onacopulaL("Joe",list(theta,1:d)) cop
<- rnacopula(n,cop)
U. enacopula(U., cop, "mle") # 2.217582
## [1] 2.217593
if(doExtras)
system.time(r. <- curveLogL(cop, U., c(1.1, 4)))
## => still looks very good
<- 200
n <- 50 # smaller 'd' -- so as to not need 'Rmpfr' here
d <- 0.2
tau <- copGumbel@iTau(tau)) (theta
## [1] 1.25
<- onacopulaL("Gumbel",list(theta,1:d)) cop
set.seed(1)
<- rnacopula(n,cop)
U1 if(doExtras) {
<- rnacopula(n,cop)
U2 <- rnacopula(n,cop)
U3
}enacopula(U1, cop, "mle") # 1.227659 (was 1.241927)
## [1] 1.227659
##--> Plots with "many" likelihood evaluations
system.time(r1 <- curveLogL(cop, U1, c(1, 2.1)))
## User System verstrichen
## 0.422 0.000 0.423
if(doExtras) system.time({
mtext("and two other generated samples")
<- curveLogL(cop, U2, c(1, 2.1), add=TRUE)
r2 <- curveLogL(cop, U3, c(1, 2.1), add=TRUE)
r3 })
<- 150
d <- 0.6
tau <- copGumbel@iTau(tau)) (theta
## [1] 2.5
.5 <- onacopulaL("Gumbel",list(theta,1:d)) cG
set.seed(17)
<- rnacopula(n,cG.5)
U4 <- rnacopula(n,cG.5)
U5 <- rnacopula(n,cG.5)
U6 if(doExtras) { ## "Rmpfr" is used {2012-06-21}: -- therefore about 18 seconds!
<- if(interactive()) 1e-12 else 1e-8
tol print(system.time(
<- c(enacopula(U4, cG.5, "mle", tol=tol),
ee. enacopula(U5, cG.5, "mle", tol=tol),
enacopula(U6, cG.5, "mle", tol=tol))))
dput(ee.)# in case the following fails
## tol=1e-12 Linux nb-mm3 3.2.0-25-generic x86_64 (2012-06-23):
## c(2.47567251789004, 2.48424484287686, 2.50410767129408)
## c(2.475672518, 2.484244763, 2.504107671),
stopifnot(all.equal(ee., c(2.475672518, 2.484244763, 2.504107671),
tolerance= max(1e-7, 16*tol)))
}## --> Plots with "many" likelihood evaluations
<- seq(1, 3, by= 1/4)
th. if(doExtras) # "default2012" (polyG default) partly uses Rmpfr here:
system.time(r4 <- sapply(th., mLogL, acop=cG.5@copula, u=U4))## 25.6 sec
## whereas this (polyG method) is very fast {and still ok}:
system.time(r4.p <- sapply(th., mLogL, acop=cG.5@copula, u=U4, method="pois"))
## User System verstrichen
## 0.099 0.000 0.100
<- c(0, -18375.33, -21948.033, -24294.995, -25775.502,
r4. -26562.609, -26772.767, -26490.809, -25781.224)
stopifnot(!doExtras ||
all.equal(r4, r4., tolerance = 8e-8),
all.equal(r4.p, r4., tolerance = 8e-8))
## --> use fast method here as well:
system.time(r5.p <- sapply(th., mLogL, acop=cG.5@copula, u=U5, method="pois"))
## User System verstrichen
## 0.101 0.000 0.102
system.time(r6.p <- sapply(th., mLogL, acop=cG.5@copula, u=U6, method="pois"))
## User System verstrichen
## 0.103 0.000 0.104
if(doExtras) {
if(FALSE) # for speed analysis, etc
debug(copula:::polyG)
mLogL(1.65, cG.5@copula, U4) # -23472.96
}<- dCopula(U4, setTheta(cG.5, 1.64), log = TRUE,
dd method = if(doExtras)"default" else "pois")
summary(dd)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 41.59 53.30 81.09 116.91 137.54 707.13
stopifnot(!is.na(dd), # no NaN's anymore
40 < range(dd), range(dd) < 710)
<- 64
n <- 5
d <- 0.8
tau <- copFrank@iTau(tau)) (theta
## [1] 18.19154
<- onacopulaL("Frank", list(theta,1:d)) cop
set.seed(11) # these seeds give no problems: 101, 41, 21
<- rnacopula(n,cop)
U. @copula <- setTheta(cop@copula, NA) # forget the true theta
copsystem.time(f.ML <- emle(U., cop)); f.ML # --> fine: theta = 18.033, Log-lik = 314.01
## User System verstrichen
## 0.013 0.000 0.013
##
## Call:
## bbmle::mle2(minuslogl = nLL, start = start, optimizer = "optimize",
## lower = interval[1], upper = interval[2])
##
## Coefficients:
## theta
## 18.0333
##
## Log-likelihood: 314.01
if(doExtras)
system.time(f.mlMC <- emle(U., cop, n.MC = 1e4)) # with MC
stopifnot(all.equal(unname(coef(f.ML)), 18.03331, tolerance= 1e-6),
all.equal(f.ML@min, -314.0143, tolerance=1e-6),
!doExtras || ## Simulate MLE (= SMLE) is "extra" random, hmm...
all.equal(unname(coef(f.mlMC)), 17.8, tolerance= 0.01)
## 64-bit ubuntu: 17.817523
## ? 64-bit Mac: 17.741
)
@copula <- setTheta(cop@copula, theta)
cop<- curveLogL(cop, U., c(1, 200)) # => now looks fine r.
tail(as.data.frame(r.), 15)
## x y
## 87 172.14 2105.690
## 88 174.13 2143.642
## 89 176.12 2181.637
## 90 178.11 2219.675
## 91 180.10 2257.754
## 92 182.09 2295.874
## 93 184.08 2334.034
## 94 186.07 2372.232
## 95 188.06 2410.468
## 96 190.05 2448.742
## 97 192.04 2487.051
## 98 194.03 2525.396
## 99 196.02 2563.776
## 100 198.01 2602.189
## 101 200.00 2640.636
stopifnot( is.finite( r.$y ),
## and is convex (everywhere):
diff(r.$y, d=2) > 0)
options(op) # revert to previous state
## R version 4.2.2 Patched (2022-11-13 r83349)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora Linux 36 (Thirty Six)
##
## Matrix products: default
## BLAS: /u/maechler/R/D/r-patched/F36-64-inst/lib/libRblas.so
## LAPACK: /u/maechler/R/D/r-patched/F36-64-inst/lib/libRlapack.so
##
## attached base packages:
## [1] parallel grid stats4 tools stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] rugarch_1.4-9 gsl_2.1-7.1 mev_1.14 lattice_0.20-45
## [5] bbmle_1.0.25 copula_1.1-1
##
## loaded via a namespace (and not attached):
## [1] zoo_1.8-11 xfun_0.35
## [3] bslib_0.4.1 partitions_1.10-7
## [5] ks_1.13.5 pcaPP_2.0-3
## [7] SkewHyperbolic_0.4-0 htmltools_0.5.3
## [9] gmp_0.6-8 yaml_2.3.6
## [11] pracma_2.4.2 rlang_1.0.6
## [13] jquerylib_0.1.4 stringr_1.4.1
## [15] pspline_1.0-19 bdsmatrix_1.3-6
## [17] mvtnorm_1.1-3 evaluate_0.18
## [19] knitr_1.40 fastmap_1.1.0
## [21] ADGofTest_0.3 evd_2.3-6.1
## [23] highr_0.9 xts_0.12.2
## [25] Rcpp_1.0.9 polynom_1.4-1
## [27] KernSmooth_2.23-20 cachem_1.0.6
## [29] DistributionUtils_0.6-0 jsonlite_1.8.3
## [31] truncnorm_1.0-8 alabama_2022.4-1
## [33] spd_2.0-1 digest_0.6.30
## [35] stringi_1.7.8 rbibutils_2.2.10
## [37] mathjaxr_1.6-0 numDeriv_2016.8-1.1
## [39] Runuran_0.37 Rdpack_2.4
## [41] stabledist_0.7-1 cli_3.4.1
## [43] magrittr_2.0.3 sass_0.4.2
## [45] nleqslv_3.3.3 Rsolnp_1.16
## [47] GeneralizedHyperbolic_0.8-4 MASS_7.3-58.1
## [49] Matrix_1.5-3 rmarkdown_2.18
## [51] mclust_6.0.0 R6_2.5.1
## [53] compiler_4.2.2