bigsimr
is an R package for simulating high-dimensional
multivariate data with a target correlation and arbitrary marginal
distributions via Gaussian copula. It utilizes Bigsimr.jl for
its core routines. For full documentation and examples, please see the
Bigsimr.jl
docs.
You can install the release version of the package from GitHub:
::install_github("SchisslerGroup/r-bigsimr") remotes
To get a bug fix or to use a new feature, you can install the development version from GitHub:
::install_github("SchisslerGroup/r-bigsimr", ref="develop") remotes
Note that the first invocation of
bigsimr::bigsimr_setup()
will install both Julia and the
required packages if they are missing. If you wish to have it use an
existing Julia binary, make sure that julia
is found in the
path. For more information see the julia_setup()
function
from JuliaCall.
library(bigsimr)
Sys.setenv(JULIA_NUM_THREADS = parallel::detectCores()) # activate multithreading
<- bigsimr_setup()
bs <- distributions_setup()
dist
set.seed(2020-02-28)
Pearson matching
<- bs$cor_randPD(3))
(target_corr #> [,1] [,2] [,3]
#> [1,] 1.0000000 -0.3566613 -0.4686234
#> [2,] -0.3566613 1.0000000 0.6501013
#> [3,] -0.4686234 0.6501013 1.0000000
<- c(dist$Binomial(20, 0.2), dist$Beta(2, 3), dist$LogNormal(3, 1))
margins <- bs$pearson_match(target_corr, margins))
(adjusted_corr #> [,1] [,2] [,3]
#> [1,] 1.0000000 -0.3670106 -0.6537634
#> [2,] -0.3670106 1.0000000 0.8431715
#> [3,] -0.6537634 0.8431715 1.0000000
<- bs$rvec(100000, adjusted_corr, margins)
x $cor(x, bs$Pearson)
bs#> [,1] [,2] [,3]
#> [1,] 1.0000000 -0.3592258 -0.4693409
#> [2,] -0.3592258 1.0000000 0.6483799
#> [3,] -0.4693409 0.6483799 1.0000000
Spearman/Kendall matching
<- bs$cor_randPD(3))
(spearman_corr #> [,1] [,2] [,3]
#> [1,] 1.0000000 0.5696526 -0.5277236
#> [2,] 0.5696526 1.0000000 -0.1333224
#> [3,] -0.5277236 -0.1333224 1.0000000
<- bs$cor_convert(spearman_corr, bs$Spearman, bs$Pearson))
(adjusted_corr #> [,1] [,2] [,3]
#> [1,] 1.0000000 0.5877329 -0.5456254
#> [2,] 0.5877329 1.0000000 -0.1395015
#> [3,] -0.5456254 -0.1395015 1.0000000
<- bs$rvec(100000, adjusted_corr, margins)
x $cor(x, bs$Spearman)
bs#> [,1] [,2] [,3]
#> [1,] 1.0000000 0.5633441 -0.5227178
#> [2,] 0.5633441 1.0000000 -0.1322200
#> [3,] -0.5227178 -0.1322200 1.0000000
Nearest correlation matrix
<- bs$cor_randPSD(200)
s <- bs$cor_convert(s, bs$Spearman, bs$Pearson)
r $iscorrelation(r)
bs#> [1] FALSE
<- bs$cor_nearPD(r)
p $iscorrelation(p)
bs#> [1] TRUE
Fast approximate nearest correlation matrix
= bs$cor_randPSD(2000)
s = bs$cor_convert(s, bs$Spearman, bs$Pearson)
r $iscorrelation(r)
bs#> [1] FALSE
= bs$cor_fastPD(r)
p $iscorrelation(p)
bs#> [1] TRUE
This package is just a wrapper for the Julia package. Please file any bug reports or feature requests over at the Bigsimr.jl package repo.